From 37ed9c199ca639565f6ce88105f9e39e898d82d0 Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Mon, 1 Dec 2014 14:51:49 -0800 Subject: Update aosp/master LLVM for rebase to r222494. Change-Id: Ic787f5e0124df789bd26f3f24680f45e678eef2d --- lib/Analysis/AliasAnalysis.cpp | 135 +- lib/Analysis/AliasAnalysisEvaluator.cpp | 8 +- lib/Analysis/AliasSetTracker.cpp | 145 +- lib/Analysis/Analysis.cpp | 4 +- lib/Analysis/Android.mk | 6 +- lib/Analysis/AssumptionTracker.cpp | 110 + lib/Analysis/BasicAliasAnalysis.cpp | 259 +- lib/Analysis/BlockFrequencyInfoImpl.cpp | 23 +- lib/Analysis/CFG.cpp | 4 +- lib/Analysis/CFGPrinter.cpp | 16 +- lib/Analysis/CFLAliasAnalysis.cpp | 1013 +++ lib/Analysis/CMakeLists.txt | 4 + lib/Analysis/CaptureTracking.cpp | 89 +- lib/Analysis/CodeMetrics.cpp | 80 +- lib/Analysis/ConstantFolding.cpp | 124 +- lib/Analysis/DependenceAnalysis.cpp | 73 +- lib/Analysis/DominanceFrontier.cpp | 135 +- lib/Analysis/FunctionTargetTransformInfo.cpp | 50 + lib/Analysis/IPA/CallGraph.cpp | 5 +- lib/Analysis/IPA/CallGraphSCCPass.cpp | 9 +- lib/Analysis/IPA/InlineCost.cpp | 35 +- lib/Analysis/IVUsers.cpp | 8 +- lib/Analysis/InstructionSimplify.cpp | 687 +- lib/Analysis/JumpInstrTableInfo.cpp | 17 +- lib/Analysis/LazyCallGraph.cpp | 10 +- lib/Analysis/LazyValueInfo.cpp | 282 +- lib/Analysis/LibCallSemantics.cpp | 4 +- lib/Analysis/Lint.cpp | 30 +- lib/Analysis/Loads.cpp | 182 +- lib/Analysis/LoopInfo.cpp | 4 +- lib/Analysis/LoopPass.cpp | 11 + lib/Analysis/MemoryBuiltins.cpp | 10 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 193 +- lib/Analysis/NoAliasAnalysis.cpp | 5 +- lib/Analysis/PHITransAddr.cpp | 6 +- lib/Analysis/PtrUseVisitor.cpp | 2 +- lib/Analysis/RegionInfo.cpp | 828 +-- lib/Analysis/RegionPass.cpp | 4 +- lib/Analysis/RegionPrinter.cpp | 51 +- lib/Analysis/ScalarEvolution.cpp | 1269 ++-- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 4 +- lib/Analysis/ScalarEvolutionExpander.cpp | 8 +- lib/Analysis/ScalarEvolutionNormalization.cpp | 2 +- lib/Analysis/ScopedNoAliasAA.cpp | 245 + lib/Analysis/StratifiedSets.h | 692 ++ lib/Analysis/TargetTransformInfo.cpp | 51 +- lib/Analysis/TypeBasedAliasAnalysis.cpp | 35 +- lib/Analysis/ValueTracking.cpp | 1097 +++- lib/AsmParser/LLLexer.cpp | 18 +- lib/AsmParser/LLLexer.h | 10 +- lib/AsmParser/LLParser.cpp | 472 +- lib/AsmParser/LLParser.h | 46 +- lib/AsmParser/LLToken.h | 12 +- lib/AsmParser/Parser.cpp | 44 +- lib/Bitcode/Reader/BitReader.cpp | 7 +- lib/Bitcode/Reader/BitcodeReader.cpp | 779 ++- lib/Bitcode/Reader/BitcodeReader.h | 62 +- lib/Bitcode/Reader/BitstreamReader.cpp | 89 +- lib/Bitcode/Writer/BitWriter.cpp | 14 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 174 +- lib/Bitcode/Writer/ValueEnumerator.cpp | 333 +- lib/Bitcode/Writer/ValueEnumerator.h | 16 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 34 +- lib/CodeGen/AggressiveAntiDepBreaker.h | 65 +- lib/CodeGen/AllocationOrder.h | 4 +- lib/CodeGen/Analysis.cpp | 51 +- lib/CodeGen/Android.mk | 7 +- lib/CodeGen/AntiDepBreaker.h | 24 +- lib/CodeGen/AsmPrinter/ARMException.cpp | 6 +- lib/CodeGen/AsmPrinter/AddressPool.h | 4 +- lib/CodeGen/AsmPrinter/Android.mk | 1 + lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 364 +- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 34 +- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h | 4 +- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 11 +- lib/CodeGen/AsmPrinter/ByteStreamer.h | 4 +- lib/CodeGen/AsmPrinter/CMakeLists.txt | 1 + lib/CodeGen/AsmPrinter/DIE.cpp | 32 +- lib/CodeGen/AsmPrinter/DIE.h | 8 +- lib/CodeGen/AsmPrinter/DIEHash.cpp | 2 +- lib/CodeGen/AsmPrinter/DIEHash.h | 4 +- .../AsmPrinter/DbgValueHistoryCalculator.cpp | 97 +- lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h | 6 +- lib/CodeGen/AsmPrinter/DebugLocEntry.h | 144 +- lib/CodeGen/AsmPrinter/DebugLocList.h | 8 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 16 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 12 +- lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 862 +++ lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 250 + lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 1128 ++-- lib/CodeGen/AsmPrinter/DwarfDebug.h | 203 +- lib/CodeGen/AsmPrinter/DwarfException.h | 37 +- lib/CodeGen/AsmPrinter/DwarfFile.cpp | 60 +- lib/CodeGen/AsmPrinter/DwarfFile.h | 45 +- lib/CodeGen/AsmPrinter/DwarfStringPool.cpp | 8 +- lib/CodeGen/AsmPrinter/DwarfStringPool.h | 13 +- lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 668 +- lib/CodeGen/AsmPrinter/DwarfUnit.h | 228 +- lib/CodeGen/AsmPrinter/EHStreamer.cpp | 18 +- lib/CodeGen/AsmPrinter/EHStreamer.h | 4 +- lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp | 4 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 4 +- lib/CodeGen/AsmPrinter/Win64Exception.cpp | 2 +- lib/CodeGen/AsmPrinter/Win64Exception.h | 52 + lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp | 167 +- lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h | 4 +- lib/CodeGen/AtomicExpandLoadLinkedPass.cpp | 380 -- lib/CodeGen/AtomicExpandPass.cpp | 563 ++ lib/CodeGen/BasicTargetTransformInfo.cpp | 39 +- lib/CodeGen/BranchFolding.cpp | 92 +- lib/CodeGen/BranchFolding.h | 28 +- lib/CodeGen/CMakeLists.txt | 9 +- lib/CodeGen/CalcSpillWeights.cpp | 12 +- lib/CodeGen/CallingConvLower.cpp | 14 +- lib/CodeGen/CodeGen.cpp | 3 +- lib/CodeGen/CodeGenPrepare.cpp | 795 ++- lib/CodeGen/CriticalAntiDepBreaker.cpp | 51 +- lib/CodeGen/CriticalAntiDepBreaker.h | 27 +- lib/CodeGen/DFAPacketizer.cpp | 21 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 22 +- lib/CodeGen/DwarfEHPrepare.cpp | 8 +- lib/CodeGen/EarlyIfConversion.cpp | 14 +- lib/CodeGen/ErlangGC.cpp | 3 +- lib/CodeGen/ExecutionDepsFix.cpp | 9 +- lib/CodeGen/ExpandISelPseudos.cpp | 4 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 7 +- lib/CodeGen/ForwardControlFlowIntegrity.cpp | 374 ++ lib/CodeGen/GCMetadata.cpp | 2 +- lib/CodeGen/GCStrategy.cpp | 5 +- lib/CodeGen/GlobalMerge.cpp | 5 +- lib/CodeGen/IfConversion.cpp | 30 +- lib/CodeGen/InlineSpiller.cpp | 68 +- lib/CodeGen/InterferenceCache.h | 4 +- lib/CodeGen/IntrinsicLowering.cpp | 33 +- lib/CodeGen/JITCodeEmitter.cpp | 14 - lib/CodeGen/JumpInstrTables.cpp | 17 +- lib/CodeGen/LLVMTargetMachine.cpp | 65 +- lib/CodeGen/LexicalScopes.cpp | 11 +- lib/CodeGen/LiveDebugVariables.cpp | 74 +- lib/CodeGen/LiveDebugVariables.h | 6 +- lib/CodeGen/LiveInterval.cpp | 2 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 16 +- lib/CodeGen/LiveRangeCalc.h | 4 +- lib/CodeGen/LiveRangeEdit.cpp | 7 +- lib/CodeGen/LiveRegMatrix.cpp | 3 +- lib/CodeGen/LiveStackAnalysis.cpp | 5 +- lib/CodeGen/LiveVariables.cpp | 262 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 9 +- lib/CodeGen/MachineBasicBlock.cpp | 48 +- lib/CodeGen/MachineBlockPlacement.cpp | 9 +- lib/CodeGen/MachineCSE.cpp | 35 +- lib/CodeGen/MachineCodeEmitter.cpp | 14 - lib/CodeGen/MachineCombiner.cpp | 435 ++ lib/CodeGen/MachineCopyPropagation.cpp | 5 +- lib/CodeGen/MachineDominanceFrontier.cpp | 54 + lib/CodeGen/MachineDominators.cpp | 2 + lib/CodeGen/MachineFunction.cpp | 132 +- lib/CodeGen/MachineFunctionAnalysis.cpp | 3 +- lib/CodeGen/MachineInstr.cpp | 131 +- lib/CodeGen/MachineInstrBundle.cpp | 19 +- lib/CodeGen/MachineLICM.cpp | 35 +- lib/CodeGen/MachineModuleInfo.cpp | 8 +- lib/CodeGen/MachineRegionInfo.cpp | 140 + lib/CodeGen/MachineRegisterInfo.cpp | 27 +- lib/CodeGen/MachineSSAUpdater.cpp | 4 +- lib/CodeGen/MachineScheduler.cpp | 43 +- lib/CodeGen/MachineSink.cpp | 177 +- lib/CodeGen/MachineTraceMetrics.cpp | 76 +- lib/CodeGen/MachineVerifier.cpp | 77 +- lib/CodeGen/OptimizePHIs.cpp | 7 +- lib/CodeGen/PHIElimination.cpp | 10 +- lib/CodeGen/PHIEliminationUtils.h | 4 +- lib/CodeGen/Passes.cpp | 26 +- lib/CodeGen/PeepholeOptimizer.cpp | 795 ++- lib/CodeGen/PostRASchedulerList.cpp | 64 +- lib/CodeGen/ProcessImplicitDefs.cpp | 5 +- lib/CodeGen/PrologEpilogInserter.cpp | 47 +- lib/CodeGen/PrologEpilogInserter.h | 4 +- lib/CodeGen/PseudoSourceValue.cpp | 8 +- lib/CodeGen/RegAllocBase.cpp | 3 +- lib/CodeGen/RegAllocBase.h | 6 +- lib/CodeGen/RegAllocBasic.cpp | 3 +- lib/CodeGen/RegAllocFast.cpp | 36 +- lib/CodeGen/RegAllocGreedy.cpp | 30 +- lib/CodeGen/RegAllocPBQP.cpp | 777 ++- lib/CodeGen/RegisterClassInfo.cpp | 7 +- lib/CodeGen/RegisterCoalescer.cpp | 108 +- lib/CodeGen/RegisterCoalescer.h | 50 +- lib/CodeGen/RegisterPressure.cpp | 3 +- lib/CodeGen/RegisterScavenging.cpp | 131 +- lib/CodeGen/ScheduleDAG.cpp | 9 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 28 +- lib/CodeGen/ScheduleDAGPrinter.cpp | 1 - lib/CodeGen/ScoreboardHazardRecognizer.cpp | 2 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2257 ++++--- lib/CodeGen/SelectionDAG/FastISel.cpp | 1339 ++-- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 117 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 48 +- lib/CodeGen/SelectionDAG/InstrEmitter.h | 5 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 453 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 125 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 68 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 18 +- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 12 +- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 66 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 96 +- lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp | 50 +- lib/CodeGen/SelectionDAG/SDNodeDbgValue.h | 58 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 32 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 80 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 38 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 4 +- lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 7 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 477 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 1021 +-- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 43 +- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 14 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 147 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 132 +- lib/CodeGen/ShadowStackGC.cpp | 2 +- lib/CodeGen/SjLjEHPrepare.cpp | 53 +- lib/CodeGen/SpillPlacement.cpp | 54 +- lib/CodeGen/SpillPlacement.h | 13 +- lib/CodeGen/Spiller.cpp | 184 - lib/CodeGen/Spiller.h | 9 +- lib/CodeGen/SplitKit.cpp | 33 +- lib/CodeGen/SplitKit.h | 4 +- lib/CodeGen/StackColoring.cpp | 2 +- lib/CodeGen/StackMapLivenessAnalysis.cpp | 4 +- lib/CodeGen/StackMaps.cpp | 32 +- lib/CodeGen/StackProtector.cpp | 9 +- lib/CodeGen/StackSlotColoring.cpp | 4 +- lib/CodeGen/TailDuplication.cpp | 9 +- lib/CodeGen/TargetFrameLoweringImpl.cpp | 6 +- lib/CodeGen/TargetInstrInfo.cpp | 102 +- lib/CodeGen/TargetLoweringBase.cpp | 87 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 158 +- lib/CodeGen/TargetOptionsImpl.cpp | 7 + lib/CodeGen/TargetRegisterInfo.cpp | 11 +- lib/CodeGen/TargetSchedule.cpp | 33 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 28 +- lib/CodeGen/UnreachableBlockElim.cpp | 11 +- lib/CodeGen/VirtRegMap.cpp | 13 +- lib/DebugInfo/Android.mk | 1 + lib/DebugInfo/CMakeLists.txt | 1 + lib/DebugInfo/DIContext.cpp | 2 +- lib/DebugInfo/DWARFAbbreviationDeclaration.h | 4 +- lib/DebugInfo/DWARFAcceleratorTable.cpp | 133 + lib/DebugInfo/DWARFAcceleratorTable.h | 51 + lib/DebugInfo/DWARFCompileUnit.h | 13 +- lib/DebugInfo/DWARFContext.cpp | 244 +- lib/DebugInfo/DWARFContext.h | 91 +- lib/DebugInfo/DWARFDebugAbbrev.cpp | 4 +- lib/DebugInfo/DWARFDebugAbbrev.h | 4 +- lib/DebugInfo/DWARFDebugArangeSet.h | 4 +- lib/DebugInfo/DWARFDebugAranges.h | 4 +- lib/DebugInfo/DWARFDebugFrame.cpp | 3 +- lib/DebugInfo/DWARFDebugFrame.h | 4 +- lib/DebugInfo/DWARFDebugInfoEntry.cpp | 115 +- lib/DebugInfo/DWARFDebugInfoEntry.h | 17 +- lib/DebugInfo/DWARFDebugLine.cpp | 38 +- lib/DebugInfo/DWARFDebugLine.h | 12 +- lib/DebugInfo/DWARFDebugLoc.h | 4 +- lib/DebugInfo/DWARFDebugRangeList.h | 4 +- lib/DebugInfo/DWARFFormValue.cpp | 44 +- lib/DebugInfo/DWARFRelocMap.h | 6 +- lib/DebugInfo/DWARFSection.h | 24 + lib/DebugInfo/DWARFTypeUnit.h | 13 +- lib/DebugInfo/DWARFUnit.cpp | 45 +- lib/DebugInfo/DWARFUnit.h | 104 +- lib/ExecutionEngine/CMakeLists.txt | 2 +- lib/ExecutionEngine/ExecutionEngine.cpp | 172 +- lib/ExecutionEngine/ExecutionEngineBindings.cpp | 22 +- .../IntelJITEvents/IntelJITEventListener.cpp | 94 +- lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt | 1 + lib/ExecutionEngine/Interpreter/CMakeLists.txt | 2 +- .../Interpreter/ExternalFunctions.cpp | 35 +- lib/ExecutionEngine/Interpreter/Interpreter.cpp | 13 +- lib/ExecutionEngine/Interpreter/Interpreter.h | 84 +- lib/ExecutionEngine/JIT/Android.mk | 17 - lib/ExecutionEngine/JIT/CMakeLists.txt | 8 - lib/ExecutionEngine/JIT/JIT.cpp | 695 -- lib/ExecutionEngine/JIT/JIT.h | 229 - lib/ExecutionEngine/JIT/JITEmitter.cpp | 1256 ---- lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 904 --- lib/ExecutionEngine/JIT/LLVMBuild.txt | 22 - lib/ExecutionEngine/JIT/Makefile | 38 - lib/ExecutionEngine/JITEventListener.cpp | 15 + lib/ExecutionEngine/LLVMBuild.txt | 2 +- lib/ExecutionEngine/MCJIT/MCJIT.cpp | 186 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 47 +- lib/ExecutionEngine/Makefile | 2 +- .../OProfileJIT/OProfileJITEventListener.cpp | 90 - lib/ExecutionEngine/RTDyldMemoryManager.cpp | 23 +- lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp | 8 +- lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h | 6 +- .../RuntimeDyld/ObjectImageCommon.h | 23 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 271 +- .../RuntimeDyld/RuntimeDyldChecker.cpp | 1348 ++-- .../RuntimeDyld/RuntimeDyldCheckerImpl.h | 76 + lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 175 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h | 10 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h | 38 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 919 +-- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h | 174 +- .../RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h | 405 ++ .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h | 277 + .../RuntimeDyld/Targets/RuntimeDyldMachOI386.h | 261 + .../RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h | 136 + lib/ExecutionEngine/TargetSelect.cpp | 5 +- lib/IR/Android.mk | 1 + lib/IR/AsmWriter.cpp | 278 +- lib/IR/AsmWriter.h | 11 +- lib/IR/AttributeImpl.h | 26 +- lib/IR/Attributes.cpp | 110 +- lib/IR/AutoUpgrade.cpp | 211 +- lib/IR/BasicBlock.cpp | 50 +- lib/IR/CMakeLists.txt | 1 + lib/IR/ConstantFold.cpp | 29 +- lib/IR/ConstantFold.h | 4 +- lib/IR/Constants.cpp | 509 +- lib/IR/ConstantsContext.h | 628 +- lib/IR/Core.cpp | 120 +- lib/IR/DIBuilder.cpp | 1271 ++-- lib/IR/DataLayout.cpp | 34 +- lib/IR/DebugInfo.cpp | 583 +- lib/IR/DebugLoc.cpp | 10 +- lib/IR/DiagnosticInfo.cpp | 29 +- lib/IR/DiagnosticPrinter.cpp | 2 +- lib/IR/Function.cpp | 122 +- lib/IR/GCOV.cpp | 13 +- lib/IR/Globals.cpp | 27 +- lib/IR/IRBuilder.cpp | 44 +- lib/IR/InlineAsm.cpp | 4 + lib/IR/Instruction.cpp | 26 +- lib/IR/Instructions.cpp | 78 +- lib/IR/LLVMContext.cpp | 65 +- lib/IR/LLVMContextImpl.cpp | 18 +- lib/IR/LLVMContextImpl.h | 89 +- lib/IR/LeaksContext.h | 6 +- lib/IR/LegacyPassManager.cpp | 14 +- lib/IR/MDBuilder.cpp | 22 +- lib/IR/Mangler.cpp | 96 +- lib/IR/Metadata.cpp | 295 +- lib/IR/Module.cpp | 58 +- lib/IR/PassManager.cpp | 2 +- lib/IR/PassRegistry.cpp | 56 +- lib/IR/SymbolTableListTraitsImpl.h | 4 +- lib/IR/Type.cpp | 45 +- lib/IR/TypeFinder.cpp | 2 +- lib/IR/Use.cpp | 2 +- lib/IR/UseListOrder.cpp | 43 + lib/IR/User.cpp | 3 - lib/IR/Value.cpp | 120 +- lib/IR/ValueSymbolTable.cpp | 25 +- lib/IR/Verifier.cpp | 198 +- lib/IRReader/IRReader.cpp | 56 +- lib/LTO/LLVMBuild.txt | 2 +- lib/LTO/LTOCodeGenerator.cpp | 113 +- lib/LTO/LTOModule.cpp | 233 +- lib/Linker/LinkModules.cpp | 913 +-- lib/MC/Android.mk | 4 +- lib/MC/CMakeLists.txt | 5 +- lib/MC/ConstantPools.cpp | 17 +- lib/MC/ELFObjectWriter.cpp | 98 +- lib/MC/LLVMBuild.txt | 2 +- lib/MC/MCAnalysis/Android.mk | 37 - lib/MC/MCAnalysis/CMakeLists.txt | 8 - lib/MC/MCAnalysis/LLVMBuild.txt | 5 - lib/MC/MCAnalysis/MCAtom.cpp | 114 - lib/MC/MCAnalysis/MCFunction.cpp | 76 - lib/MC/MCAnalysis/MCModule.cpp | 142 - lib/MC/MCAnalysis/MCModuleYAML.cpp | 464 -- lib/MC/MCAnalysis/MCObjectDisassembler.cpp | 574 -- lib/MC/MCAnalysis/MCObjectSymbolizer.cpp | 268 - lib/MC/MCAnalysis/Makefile | 14 - lib/MC/MCAsmInfo.cpp | 6 +- lib/MC/MCAsmInfoCOFF.cpp | 1 - lib/MC/MCAsmInfoDarwin.cpp | 4 +- lib/MC/MCAsmInfoELF.cpp | 9 + lib/MC/MCAsmStreamer.cpp | 24 +- lib/MC/MCAssembler.cpp | 76 +- lib/MC/MCContext.cpp | 62 +- lib/MC/MCDisassembler.cpp | 39 - lib/MC/MCDisassembler/Android.mk | 26 +- lib/MC/MCDisassembler/CMakeLists.txt | 3 + lib/MC/MCDisassembler/Disassembler.cpp | 65 +- lib/MC/MCDisassembler/Disassembler.h | 4 +- lib/MC/MCDisassembler/MCDisassembler.cpp | 39 + lib/MC/MCDisassembler/MCExternalSymbolizer.cpp | 198 + lib/MC/MCDisassembler/MCRelocationInfo.cpp | 39 + lib/MC/MCDwarf.cpp | 252 +- lib/MC/MCELF.cpp | 4 +- lib/MC/MCELFObjectTargetWriter.cpp | 3 +- lib/MC/MCELFStreamer.cpp | 57 +- lib/MC/MCExpr.cpp | 91 +- lib/MC/MCExternalSymbolizer.cpp | 198 - lib/MC/MCMachOStreamer.cpp | 10 +- lib/MC/MCNullStreamer.cpp | 1 - lib/MC/MCObjectFileInfo.cpp | 59 +- lib/MC/MCObjectStreamer.cpp | 71 +- lib/MC/MCParser/AsmLexer.cpp | 18 +- lib/MC/MCParser/AsmParser.cpp | 207 +- lib/MC/MCParser/COFFAsmParser.cpp | 4 + lib/MC/MCParser/DarwinAsmParser.cpp | 8 +- lib/MC/MCParser/ELFAsmParser.cpp | 2 +- lib/MC/MCParser/MCAsmLexer.cpp | 4 + lib/MC/MCParser/MCAsmParser.cpp | 2 +- lib/MC/MCRelocationInfo.cpp | 39 - lib/MC/MCSectionCOFF.cpp | 18 +- lib/MC/MCSectionELF.cpp | 4 +- lib/MC/MCStreamer.cpp | 325 +- lib/MC/MCSubtargetInfo.cpp | 12 +- lib/MC/MCTargetOptions.cpp | 6 +- lib/MC/MCWin64EH.cpp | 173 +- lib/MC/MCWinEH.cpp | 84 + lib/MC/MachObjectWriter.cpp | 82 +- lib/MC/Makefile | 2 +- lib/MC/StringTableBuilder.cpp | 45 +- lib/MC/SubtargetFeature.cpp | 18 +- lib/MC/WinCOFFObjectWriter.cpp | 338 +- lib/MC/WinCOFFStreamer.cpp | 33 +- lib/Object/Archive.cpp | 40 +- lib/Object/Binary.cpp | 37 +- lib/Object/COFFObjectFile.cpp | 1007 ++- lib/Object/COFFYAML.cpp | 106 + lib/Object/ELF.cpp | 12 + lib/Object/ELFObjectFile.cpp | 37 +- lib/Object/ELFYAML.cpp | 159 +- lib/Object/Error.cpp | 10 +- lib/Object/IRObjectFile.cpp | 74 +- lib/Object/LLVMBuild.txt | 2 +- lib/Object/MachOObjectFile.cpp | 1379 ++-- lib/Object/MachOUniversal.cpp | 33 +- lib/Object/Object.cpp | 60 +- lib/Object/ObjectFile.cpp | 30 +- lib/Object/RecordStreamer.h | 4 +- lib/Object/SymbolicFile.cpp | 37 +- lib/Option/ArgList.cpp | 9 + lib/Option/OptTable.cpp | 18 +- lib/Option/Option.cpp | 12 +- lib/ProfileData/Android.mk | 12 +- lib/ProfileData/CMakeLists.txt | 6 + lib/ProfileData/CoverageMapping.cpp | 475 ++ lib/ProfileData/CoverageMappingReader.cpp | 553 ++ lib/ProfileData/CoverageMappingWriter.cpp | 187 + lib/ProfileData/InstrProf.cpp | 6 +- lib/ProfileData/InstrProfIndexed.h | 8 +- lib/ProfileData/InstrProfReader.cpp | 100 +- lib/ProfileData/InstrProfWriter.cpp | 56 +- lib/ProfileData/LLVMBuild.txt | 2 +- lib/ProfileData/SampleProf.cpp | 51 + lib/ProfileData/SampleProfReader.cpp | 399 ++ lib/ProfileData/SampleProfWriter.cpp | 126 + lib/Support/APFloat.cpp | 72 +- lib/Support/APInt.cpp | 63 +- lib/Support/Android.mk | 6 +- lib/Support/CMakeLists.txt | 70 +- lib/Support/CommandLine.cpp | 80 +- lib/Support/DataStream.cpp | 4 +- lib/Support/Debug.cpp | 9 +- lib/Support/Disassembler.cpp | 74 - lib/Support/Dwarf.cpp | 105 +- lib/Support/ErrorHandling.cpp | 9 +- lib/Support/FileOutputBuffer.cpp | 19 +- lib/Support/FileUtilities.cpp | 16 +- lib/Support/GraphWriter.cpp | 5 +- lib/Support/Host.cpp | 4 +- lib/Support/IncludeFile.cpp | 20 - lib/Support/LineIterator.cpp | 55 +- lib/Support/LockFileManager.cpp | 8 +- lib/Support/MD5.cpp | 48 +- lib/Support/MathExtras.cpp | 32 + lib/Support/MemoryBuffer.cpp | 149 +- lib/Support/MemoryObject.cpp | 19 - lib/Support/Options.cpp | 33 + lib/Support/Path.cpp | 173 +- lib/Support/ScaledNumber.cpp | 3 + lib/Support/SmallPtrSet.cpp | 27 +- lib/Support/SourceMgr.cpp | 11 +- lib/Support/SpecialCaseList.cpp | 32 +- lib/Support/StreamableMemoryObject.cpp | 140 - lib/Support/StreamingMemoryObject.cpp | 127 + lib/Support/StringRef.cpp | 22 +- lib/Support/StringRefMemoryObject.cpp | 29 - lib/Support/TimeValue.cpp | 6 - lib/Support/Timer.cpp | 8 +- lib/Support/ToolOutputFile.cpp | 15 +- lib/Support/Triple.cpp | 283 +- lib/Support/Unix/Host.inc | 6 +- lib/Support/Unix/Path.inc | 138 +- lib/Support/Unix/Process.inc | 108 +- lib/Support/Unix/Program.inc | 79 +- lib/Support/Unix/RWMutex.inc | 12 +- lib/Support/Unix/Signals.inc | 266 +- lib/Support/Unix/TimeValue.inc | 2 +- lib/Support/Unix/Unix.h | 4 +- lib/Support/Windows/DynamicLibrary.inc | 72 +- lib/Support/Windows/Host.inc | 2 +- lib/Support/Windows/Path.inc | 231 +- lib/Support/Windows/Process.inc | 101 +- lib/Support/Windows/Program.inc | 184 +- lib/Support/Windows/RWMutex.inc | 8 +- lib/Support/Windows/ThreadLocal.inc | 2 +- lib/Support/Windows/WindowsSupport.h | 3 + lib/Support/Windows/explicit_symbols.inc | 30 + lib/Support/YAMLParser.cpp | 60 +- lib/Support/YAMLTraits.cpp | 59 +- lib/Support/raw_ostream.cpp | 111 +- lib/TableGen/Main.cpp | 23 +- lib/TableGen/Record.cpp | 52 +- lib/TableGen/TGLexer.cpp | 3 +- lib/TableGen/TGLexer.h | 17 +- lib/TableGen/TGParser.cpp | 96 +- lib/TableGen/TGParser.h | 4 +- lib/Target/AArch64/AArch64.h | 9 +- lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp | 6 +- lib/Target/AArch64/AArch64AddressTypePromotion.cpp | 6 +- lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp | 12 +- lib/Target/AArch64/AArch64AsmPrinter.cpp | 16 +- lib/Target/AArch64/AArch64BranchRelaxation.cpp | 11 +- lib/Target/AArch64/AArch64CallingConvention.td | 41 +- .../AArch64/AArch64CleanupLocalDynamicTLSPass.cpp | 4 +- lib/Target/AArch64/AArch64CollectLOH.cpp | 52 +- lib/Target/AArch64/AArch64ConditionOptimizer.cpp | 422 ++ lib/Target/AArch64/AArch64ConditionalCompares.cpp | 12 +- .../AArch64/AArch64DeadRegisterDefinitionsPass.cpp | 9 +- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp | 16 +- lib/Target/AArch64/AArch64FastISel.cpp | 4532 ++++++++++--- lib/Target/AArch64/AArch64FrameLowering.cpp | 39 +- lib/Target/AArch64/AArch64FrameLowering.h | 4 +- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 247 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 1314 +++- lib/Target/AArch64/AArch64ISelLowering.h | 37 +- lib/Target/AArch64/AArch64InstrAtomics.td | 9 +- lib/Target/AArch64/AArch64InstrFormats.td | 72 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 952 ++- lib/Target/AArch64/AArch64InstrInfo.h | 40 +- lib/Target/AArch64/AArch64InstrInfo.td | 450 +- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 16 +- lib/Target/AArch64/AArch64MCInstLower.cpp | 3 +- lib/Target/AArch64/AArch64MCInstLower.h | 6 +- lib/Target/AArch64/AArch64MachineCombinerPattern.h | 42 + lib/Target/AArch64/AArch64MachineFunctionInfo.h | 6 +- lib/Target/AArch64/AArch64PBQPRegAlloc.cpp | 383 ++ lib/Target/AArch64/AArch64PBQPRegAlloc.h | 38 + lib/Target/AArch64/AArch64PerfectShuffle.h | 5 + lib/Target/AArch64/AArch64PromoteConstant.cpp | 6 +- lib/Target/AArch64/AArch64RegisterInfo.cpp | 12 +- lib/Target/AArch64/AArch64RegisterInfo.h | 6 +- lib/Target/AArch64/AArch64RegisterInfo.td | 5 +- lib/Target/AArch64/AArch64SchedA57.td | 371 +- lib/Target/AArch64/AArch64SchedA57WriteRes.td | 52 +- lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 3 +- lib/Target/AArch64/AArch64SelectionDAGInfo.h | 4 +- lib/Target/AArch64/AArch64StorePairSuppress.cpp | 14 +- lib/Target/AArch64/AArch64Subtarget.cpp | 34 +- lib/Target/AArch64/AArch64Subtarget.h | 38 +- lib/Target/AArch64/AArch64TargetMachine.cpp | 95 +- lib/Target/AArch64/AArch64TargetMachine.h | 34 +- lib/Target/AArch64/AArch64TargetObjectFile.h | 4 +- lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 78 +- lib/Target/AArch64/Android.mk | 2 + lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 229 +- lib/Target/AArch64/CMakeLists.txt | 4 +- .../AArch64/Disassembler/AArch64Disassembler.cpp | 67 +- .../AArch64/Disassembler/AArch64Disassembler.h | 11 +- .../Disassembler/AArch64ExternalSymbolizer.h | 4 +- lib/Target/AArch64/Disassembler/LLVMBuild.txt | 2 +- .../AArch64/InstPrinter/AArch64InstPrinter.cpp | 18 +- .../AArch64/InstPrinter/AArch64InstPrinter.h | 9 +- lib/Target/AArch64/LLVMBuild.txt | 2 +- .../AArch64/MCTargetDesc/AArch64AddressingModes.h | 101 +- .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 13 +- .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 70 +- .../AArch64/MCTargetDesc/AArch64ELFStreamer.h | 8 +- .../AArch64/MCTargetDesc/AArch64FixupKinds.h | 4 +- .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 5 +- lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 4 +- .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 2 +- lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 5 +- lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h | 7 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 47 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h | 15 +- .../MCTargetDesc/AArch64MachObjectWriter.cpp | 7 +- .../AArch64/MCTargetDesc/AArch64TargetStreamer.cpp | 7 +- .../AArch64/TargetInfo/AArch64TargetInfo.cpp | 13 +- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 36 +- lib/Target/AArch64/Utils/AArch64BaseInfo.h | 13 +- lib/Target/ARM/A15SDOptimizer.cpp | 6 +- lib/Target/ARM/ARM.h | 9 +- lib/Target/ARM/ARM.td | 27 +- lib/Target/ARM/ARMAsmPrinter.cpp | 58 +- lib/Target/ARM/ARMAsmPrinter.h | 4 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 254 +- lib/Target/ARM/ARMBaseInstrInfo.h | 72 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 106 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 14 +- lib/Target/ARM/ARMCallingConv.h | 21 +- lib/Target/ARM/ARMCodeEmitter.cpp | 1909 ------ lib/Target/ARM/ARMConstantIslandPass.cpp | 41 +- lib/Target/ARM/ARMConstantPoolValue.h | 4 +- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 7 +- lib/Target/ARM/ARMFPUName.def | 1 + lib/Target/ARM/ARMFPUName.h | 6 +- lib/Target/ARM/ARMFastISel.cpp | 153 +- lib/Target/ARM/ARMFeatures.h | 4 +- lib/Target/ARM/ARMFrameLowering.cpp | 297 +- lib/Target/ARM/ARMFrameLowering.h | 4 +- lib/Target/ARM/ARMHazardRecognizer.cpp | 4 +- lib/Target/ARM/ARMHazardRecognizer.h | 6 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 86 +- lib/Target/ARM/ARMISelLowering.cpp | 636 +- lib/Target/ARM/ARMISelLowering.h | 36 +- lib/Target/ARM/ARMInstrFormats.td | 10 + lib/Target/ARM/ARMInstrInfo.cpp | 50 +- lib/Target/ARM/ARMInstrInfo.h | 8 +- lib/Target/ARM/ARMInstrInfo.td | 109 +- lib/Target/ARM/ARMInstrNEON.td | 28 +- lib/Target/ARM/ARMInstrThumb.td | 14 +- lib/Target/ARM/ARMInstrThumb2.td | 128 +- lib/Target/ARM/ARMInstrVFP.td | 79 +- lib/Target/ARM/ARMJITInfo.cpp | 344 - lib/Target/ARM/ARMJITInfo.h | 177 - lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 273 +- lib/Target/ARM/ARMMachineFunctionInfo.h | 34 +- lib/Target/ARM/ARMPerfectShuffle.h | 5 + lib/Target/ARM/ARMRegisterInfo.h | 4 +- lib/Target/ARM/ARMRelocations.h | 62 - lib/Target/ARM/ARMSelectionDAGInfo.cpp | 2 +- lib/Target/ARM/ARMSelectionDAGInfo.h | 4 +- lib/Target/ARM/ARMSubtarget.cpp | 139 +- lib/Target/ARM/ARMSubtarget.h | 67 +- lib/Target/ARM/ARMTargetMachine.cpp | 64 +- lib/Target/ARM/ARMTargetMachine.h | 36 +- lib/Target/ARM/ARMTargetObjectFile.h | 4 +- lib/Target/ARM/ARMTargetTransformInfo.cpp | 31 +- lib/Target/ARM/Android.mk | 2 - lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 479 +- lib/Target/ARM/CMakeLists.txt | 5 +- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 409 +- lib/Target/ARM/Disassembler/LLVMBuild.txt | 2 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 156 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 6 +- lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h | 51 +- lib/Target/ARM/MCTargetDesc/ARMArchName.h | 6 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 432 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h | 69 + lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h | 33 + lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h | 27 + lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h | 26 + lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 4 +- lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 8 +- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 17 +- lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h | 4 +- lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 2 - lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h | 4 +- lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp | 6 - lib/Target/ARM/MCTargetDesc/ARMMCExpr.h | 11 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 206 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 4 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h | 6 +- lib/Target/ARM/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/ARM/MLxExpansionPass.cpp | 4 +- lib/Target/ARM/Makefile | 2 +- lib/Target/ARM/Thumb1FrameLowering.cpp | 110 +- lib/Target/ARM/Thumb1FrameLowering.h | 4 +- lib/Target/ARM/Thumb1InstrInfo.cpp | 34 +- lib/Target/ARM/Thumb1InstrInfo.h | 9 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 391 +- lib/Target/ARM/Thumb1RegisterInfo.h | 6 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 7 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 9 + lib/Target/ARM/Thumb2InstrInfo.h | 10 +- lib/Target/ARM/Thumb2RegisterInfo.cpp | 2 +- lib/Target/ARM/Thumb2RegisterInfo.h | 6 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 6 +- lib/Target/Android.mk | 1 - lib/Target/CMakeLists.txt | 1 - lib/Target/CppBackend/CPPTargetMachine.h | 16 +- lib/Target/Hexagon/CMakeLists.txt | 25 +- lib/Target/Hexagon/Disassembler/CMakeLists.txt | 3 + .../Hexagon/Disassembler/HexagonDisassembler.cpp | 114 + lib/Target/Hexagon/Disassembler/LLVMBuild.txt | 23 + lib/Target/Hexagon/Disassembler/Makefile | 16 + lib/Target/Hexagon/Hexagon.h | 4 +- lib/Target/Hexagon/HexagonAsmPrinter.cpp | 2 +- lib/Target/Hexagon/HexagonAsmPrinter.h | 4 +- lib/Target/Hexagon/HexagonCFGOptimizer.cpp | 2 +- lib/Target/Hexagon/HexagonCallingConvLower.cpp | 6 +- lib/Target/Hexagon/HexagonCallingConvLower.h | 4 +- lib/Target/Hexagon/HexagonCopyToCombine.cpp | 4 +- lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp | 18 +- lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 2 +- lib/Target/Hexagon/HexagonFrameLowering.cpp | 19 +- lib/Target/Hexagon/HexagonFrameLowering.h | 4 +- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 11 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 22 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 41 +- lib/Target/Hexagon/HexagonISelLowering.h | 4 +- lib/Target/Hexagon/HexagonInstrFormats.td | 19 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 39 +- lib/Target/Hexagon/HexagonInstrInfo.h | 9 +- lib/Target/Hexagon/HexagonInstrInfo.td | 99 +- lib/Target/Hexagon/HexagonInstrInfoV4.td | 62 +- lib/Target/Hexagon/HexagonIntrinsics.td | 5 + lib/Target/Hexagon/HexagonIntrinsicsV4.td | 9 +- lib/Target/Hexagon/HexagonMachineFunctionInfo.h | 4 +- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 10 +- lib/Target/Hexagon/HexagonMachineScheduler.h | 20 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 6 +- lib/Target/Hexagon/HexagonPeephole.cpp | 6 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 14 +- lib/Target/Hexagon/HexagonRegisterInfo.h | 4 +- lib/Target/Hexagon/HexagonRegisterInfo.td | 53 +- lib/Target/Hexagon/HexagonSelectionDAGInfo.h | 4 +- .../Hexagon/HexagonSplitConst32AndConst64.cpp | 15 +- lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp | 2 +- lib/Target/Hexagon/HexagonSubtarget.h | 26 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 3 + lib/Target/Hexagon/HexagonTargetMachine.h | 31 +- lib/Target/Hexagon/HexagonTargetObjectFile.cpp | 5 +- lib/Target/Hexagon/HexagonTargetObjectFile.h | 4 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 114 +- .../Hexagon/HexagonVarargsCallingConvention.h | 20 +- lib/Target/Hexagon/InstPrinter/CMakeLists.txt | 3 - .../Hexagon/InstPrinter/HexagonInstPrinter.cpp | 204 - .../Hexagon/InstPrinter/HexagonInstPrinter.h | 87 - lib/Target/Hexagon/InstPrinter/LLVMBuild.txt | 23 - lib/Target/Hexagon/InstPrinter/Makefile | 15 - lib/Target/Hexagon/LLVMBuild.txt | 4 +- lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt | 6 + .../Hexagon/MCTargetDesc/HexagonAsmBackend.cpp | 74 + lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h | 15 +- .../MCTargetDesc/HexagonELFObjectWriter.cpp | 62 + .../Hexagon/MCTargetDesc/HexagonInstPrinter.cpp | 254 + .../Hexagon/MCTargetDesc/HexagonInstPrinter.h | 87 + .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp | 1 - lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h | 4 +- .../Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp | 88 + .../Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h | 60 + lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp | 11 +- lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h | 4 +- .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 61 +- .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.h | 26 +- lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/Hexagon/Makefile | 16 +- lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h | 4 +- lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h | 4 +- .../MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp | 2 +- .../MSP430/MCTargetDesc/MSP430MCTargetDesc.h | 4 +- lib/Target/MSP430/MSP430.h | 4 +- lib/Target/MSP430/MSP430BranchSelector.cpp | 3 +- lib/Target/MSP430/MSP430CallingConv.td | 2 +- lib/Target/MSP430/MSP430FrameLowering.cpp | 69 +- lib/Target/MSP430/MSP430FrameLowering.h | 4 +- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 6 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 46 +- lib/Target/MSP430/MSP430ISelLowering.h | 6 +- lib/Target/MSP430/MSP430InstrInfo.cpp | 2 +- lib/Target/MSP430/MSP430InstrInfo.h | 4 +- lib/Target/MSP430/MSP430InstrInfo.td | 232 +- lib/Target/MSP430/MSP430MCInstLower.cpp | 5 +- lib/Target/MSP430/MSP430MCInstLower.h | 4 +- lib/Target/MSP430/MSP430MachineFunctionInfo.h | 4 +- lib/Target/MSP430/MSP430RegisterInfo.cpp | 48 +- lib/Target/MSP430/MSP430RegisterInfo.h | 6 +- lib/Target/MSP430/MSP430RegisterInfo.td | 38 +- lib/Target/MSP430/MSP430SelectionDAGInfo.h | 4 +- lib/Target/MSP430/MSP430Subtarget.cpp | 2 +- lib/Target/MSP430/MSP430Subtarget.h | 22 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 4 + lib/Target/MSP430/MSP430TargetMachine.h | 30 +- lib/Target/Mips/Android.mk | 4 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 1289 +++- lib/Target/Mips/CMakeLists.txt | 7 +- lib/Target/Mips/Disassembler/LLVMBuild.txt | 2 +- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 308 +- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 22 + lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 5 +- lib/Target/Mips/LLVMBuild.txt | 2 +- lib/Target/Mips/MCTargetDesc/Android.mk | 1 + lib/Target/Mips/MCTargetDesc/CMakeLists.txt | 1 + .../Mips/MCTargetDesc/MipsABIFlagsSection.cpp | 6 + lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h | 15 +- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 7 +- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 4 +- lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h | 4 +- .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 6 +- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 64 +- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 46 +- lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h | 6 +- lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp | 1 - lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h | 4 +- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 129 + lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h | 36 +- lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp | 5 +- lib/Target/Mips/MCTargetDesc/MipsMCExpr.h | 7 +- lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h | 7 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 9 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h | 4 +- .../Mips/MCTargetDesc/MipsNaClELFStreamer.cpp | 4 +- lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp | 92 + .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 341 +- lib/Target/Mips/Makefile | 2 +- lib/Target/Mips/MicroMipsInstrFPU.td | 8 +- lib/Target/Mips/MicroMipsInstrFormats.td | 196 + lib/Target/Mips/MicroMipsInstrInfo.td | 291 +- lib/Target/Mips/Mips.h | 6 +- lib/Target/Mips/Mips.td | 10 +- lib/Target/Mips/Mips16FrameLowering.cpp | 8 +- lib/Target/Mips/Mips16FrameLowering.h | 4 +- lib/Target/Mips/Mips16HardFloat.cpp | 2 +- lib/Target/Mips/Mips16HardFloat.h | 7 +- lib/Target/Mips/Mips16HardFloatInfo.h | 4 +- lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 15 +- lib/Target/Mips/Mips16ISelDAGToDAG.h | 4 +- lib/Target/Mips/Mips16ISelLowering.cpp | 70 +- lib/Target/Mips/Mips16ISelLowering.h | 22 +- lib/Target/Mips/Mips16InstrFormats.td | 2 +- lib/Target/Mips/Mips16InstrInfo.cpp | 74 +- lib/Target/Mips/Mips16InstrInfo.h | 6 +- lib/Target/Mips/Mips16InstrInfo.td | 4 +- lib/Target/Mips/Mips16RegisterInfo.cpp | 8 +- lib/Target/Mips/Mips16RegisterInfo.h | 4 +- lib/Target/Mips/Mips32r6InstrFormats.td | 2 +- lib/Target/Mips/Mips32r6InstrInfo.td | 4 +- lib/Target/Mips/Mips64InstrInfo.td | 24 +- lib/Target/Mips/Mips64r6InstrInfo.td | 4 +- lib/Target/Mips/MipsABIInfo.cpp | 45 + lib/Target/Mips/MipsABIInfo.h | 61 + lib/Target/Mips/MipsAnalyzeImmediate.cpp | 3 +- lib/Target/Mips/MipsAnalyzeImmediate.h | 4 +- lib/Target/Mips/MipsAsmPrinter.cpp | 80 +- lib/Target/Mips/MipsAsmPrinter.h | 18 +- lib/Target/Mips/MipsCCState.cpp | 142 + lib/Target/Mips/MipsCCState.h | 136 + lib/Target/Mips/MipsCallingConv.td | 236 +- lib/Target/Mips/MipsCodeEmitter.cpp | 434 -- lib/Target/Mips/MipsConstantIslandPass.cpp | 24 +- lib/Target/Mips/MipsDelaySlotFiller.cpp | 56 +- lib/Target/Mips/MipsFastISel.cpp | 1118 +++- lib/Target/Mips/MipsFrameLowering.cpp | 7 +- lib/Target/Mips/MipsFrameLowering.h | 7 +- lib/Target/Mips/MipsISelDAGToDAG.h | 6 +- lib/Target/Mips/MipsISelLowering.cpp | 1079 ++-- lib/Target/Mips/MipsISelLowering.h | 174 +- lib/Target/Mips/MipsInstrFPU.td | 48 +- lib/Target/Mips/MipsInstrFormats.td | 2 +- lib/Target/Mips/MipsInstrInfo.cpp | 50 +- lib/Target/Mips/MipsInstrInfo.h | 14 +- lib/Target/Mips/MipsInstrInfo.td | 183 +- lib/Target/Mips/MipsJITInfo.cpp | 286 - lib/Target/Mips/MipsJITInfo.h | 71 - lib/Target/Mips/MipsLongBranch.cpp | 20 +- lib/Target/Mips/MipsMCInstLower.h | 4 +- lib/Target/Mips/MipsMSAInstrInfo.td | 2 +- lib/Target/Mips/MipsMachineFunction.cpp | 12 +- lib/Target/Mips/MipsMachineFunction.h | 19 +- lib/Target/Mips/MipsModuleISelDAGToDAG.cpp | 2 +- lib/Target/Mips/MipsModuleISelDAGToDAG.h | 12 +- lib/Target/Mips/MipsOptimizePICCall.cpp | 2 +- lib/Target/Mips/MipsOptionRecord.h | 78 + lib/Target/Mips/MipsOs16.h | 8 +- lib/Target/Mips/MipsRegisterInfo.cpp | 12 +- lib/Target/Mips/MipsRegisterInfo.h | 4 +- lib/Target/Mips/MipsRegisterInfo.td | 32 +- lib/Target/Mips/MipsRelocations.h | 41 - lib/Target/Mips/MipsSEFrameLowering.cpp | 199 +- lib/Target/Mips/MipsSEFrameLowering.h | 4 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 3 +- lib/Target/Mips/MipsSEISelDAGToDAG.h | 4 +- lib/Target/Mips/MipsSEISelLowering.cpp | 186 +- lib/Target/Mips/MipsSEISelLowering.h | 22 +- lib/Target/Mips/MipsSEInstrInfo.cpp | 97 +- lib/Target/Mips/MipsSEInstrInfo.h | 6 +- lib/Target/Mips/MipsSERegisterInfo.cpp | 4 +- lib/Target/Mips/MipsSERegisterInfo.h | 4 +- lib/Target/Mips/MipsSelectionDAGInfo.h | 4 +- lib/Target/Mips/MipsSubtarget.cpp | 175 +- lib/Target/Mips/MipsSubtarget.h | 107 +- lib/Target/Mips/MipsTargetMachine.cpp | 101 +- lib/Target/Mips/MipsTargetMachine.h | 56 +- lib/Target/Mips/MipsTargetObjectFile.cpp | 73 +- lib/Target/Mips/MipsTargetObjectFile.h | 23 +- lib/Target/Mips/MipsTargetStreamer.h | 71 +- lib/Target/NVPTX/CMakeLists.txt | 24 +- lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp | 4 +- lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h | 4 +- lib/Target/NVPTX/LLVMBuild.txt | 2 +- lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 15 +- lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp | 4 +- lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h | 8 +- lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h | 4 +- lib/Target/NVPTX/ManagedStringPool.h | 4 +- lib/Target/NVPTX/NVPTX.h | 6 +- lib/Target/NVPTX/NVPTXAllocaHoisting.h | 6 +- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 277 +- lib/Target/NVPTX/NVPTXAsmPrinter.h | 21 +- lib/Target/NVPTX/NVPTXFrameLowering.cpp | 10 +- lib/Target/NVPTX/NVPTXFrameLowering.h | 4 +- lib/Target/NVPTX/NVPTXGenericToNVVM.cpp | 15 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 1873 +++++- lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 14 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 1529 ++++- lib/Target/NVPTX/NVPTXISelLowering.h | 324 +- lib/Target/NVPTX/NVPTXInstrFormats.td | 24 +- lib/Target/NVPTX/NVPTXInstrInfo.h | 4 +- lib/Target/NVPTX/NVPTXInstrInfo.td | 37 +- lib/Target/NVPTX/NVPTXIntrinsics.td | 4040 ++++++++++-- lib/Target/NVPTX/NVPTXLowerAggrCopies.h | 4 +- lib/Target/NVPTX/NVPTXLowerStructArgs.cpp | 134 + lib/Target/NVPTX/NVPTXMCExpr.h | 7 +- lib/Target/NVPTX/NVPTXMachineFunctionInfo.h | 5 + lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp | 9 +- lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 4 +- lib/Target/NVPTX/NVPTXRegisterInfo.h | 4 +- lib/Target/NVPTX/NVPTXRegisterInfo.td | 4 +- lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp | 276 +- lib/Target/NVPTX/NVPTXSection.h | 4 +- lib/Target/NVPTX/NVPTXSubtarget.cpp | 3 +- lib/Target/NVPTX/NVPTXSubtarget.h | 31 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 14 + lib/Target/NVPTX/NVPTXTargetMachine.h | 39 +- lib/Target/NVPTX/NVPTXTargetObjectFile.h | 10 +- lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp | 115 + lib/Target/NVPTX/NVPTXUtilities.cpp | 6 +- lib/Target/NVPTX/NVPTXUtilities.h | 4 +- lib/Target/NVPTX/NVPTXutil.h | 4 +- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 262 +- lib/Target/PowerPC/CMakeLists.txt | 5 +- lib/Target/PowerPC/Disassembler/LLVMBuild.txt | 2 +- .../PowerPC/Disassembler/PPCDisassembler.cpp | 28 +- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 18 +- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h | 5 +- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 26 + .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 75 +- lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h | 4 +- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 6 +- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h | 8 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 57 + lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 73 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 11 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 64 +- lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 4 +- .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 4 +- lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h | 4 +- lib/Target/PowerPC/Makefile | 2 +- lib/Target/PowerPC/PPC.h | 23 +- lib/Target/PowerPC/PPC.td | 31 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 440 +- lib/Target/PowerPC/PPCBranchSelector.cpp | 3 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 7 +- lib/Target/PowerPC/PPCCallingConv.td | 37 +- lib/Target/PowerPC/PPCCodeEmitter.cpp | 293 - lib/Target/PowerPC/PPCFastISel.cpp | 125 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 60 +- lib/Target/PowerPC/PPCFrameLowering.h | 19 +- lib/Target/PowerPC/PPCHazardRecognizers.h | 12 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 329 +- lib/Target/PowerPC/PPCISelLowering.cpp | 1466 +++-- lib/Target/PowerPC/PPCISelLowering.h | 82 +- lib/Target/PowerPC/PPCInstr64Bit.td | 27 +- lib/Target/PowerPC/PPCInstrAltivec.td | 245 +- lib/Target/PowerPC/PPCInstrBuilder.h | 4 +- lib/Target/PowerPC/PPCInstrFormats.td | 70 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 49 +- lib/Target/PowerPC/PPCInstrInfo.h | 6 +- lib/Target/PowerPC/PPCInstrInfo.td | 425 +- lib/Target/PowerPC/PPCInstrSPE.td | 447 ++ lib/Target/PowerPC/PPCInstrVSX.td | 110 +- lib/Target/PowerPC/PPCJITInfo.cpp | 482 -- lib/Target/PowerPC/PPCJITInfo.h | 46 - lib/Target/PowerPC/PPCMCInstLower.cpp | 22 +- lib/Target/PowerPC/PPCMachineFunctionInfo.cpp | 9 + lib/Target/PowerPC/PPCMachineFunctionInfo.h | 19 +- lib/Target/PowerPC/PPCPerfectShuffle.h | 5 + lib/Target/PowerPC/PPCRegisterInfo.cpp | 82 +- lib/Target/PowerPC/PPCRegisterInfo.h | 4 +- lib/Target/PowerPC/PPCRelocations.h | 56 - lib/Target/PowerPC/PPCSchedule.td | 1 + lib/Target/PowerPC/PPCSelectionDAGInfo.h | 4 +- lib/Target/PowerPC/PPCSubtarget.cpp | 140 +- lib/Target/PowerPC/PPCSubtarget.h | 90 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 94 +- lib/Target/PowerPC/PPCTargetMachine.h | 42 +- lib/Target/PowerPC/PPCTargetObjectFile.h | 4 +- lib/Target/PowerPC/PPCTargetStreamer.h | 6 +- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 69 +- lib/Target/R600/AMDGPU.h | 20 +- lib/Target/R600/AMDGPU.td | 43 +- lib/Target/R600/AMDGPUAlwaysInlinePass.cpp | 66 + lib/Target/R600/AMDGPUAsmPrinter.cpp | 154 +- lib/Target/R600/AMDGPUAsmPrinter.h | 24 +- lib/Target/R600/AMDGPUCallingConv.td | 32 +- lib/Target/R600/AMDGPUFrameLowering.h | 6 +- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 489 +- lib/Target/R600/AMDGPUISelLowering.cpp | 964 ++- lib/Target/R600/AMDGPUISelLowering.h | 98 +- lib/Target/R600/AMDGPUInstrInfo.cpp | 56 +- lib/Target/R600/AMDGPUInstrInfo.h | 20 +- lib/Target/R600/AMDGPUInstrInfo.td | 60 +- lib/Target/R600/AMDGPUInstructions.td | 125 +- lib/Target/R600/AMDGPUIntrinsicInfo.cpp | 2 +- lib/Target/R600/AMDGPUIntrinsicInfo.h | 8 +- lib/Target/R600/AMDGPUIntrinsics.td | 3 - lib/Target/R600/AMDGPUMCInstLower.cpp | 23 +- lib/Target/R600/AMDGPUMCInstLower.h | 6 +- lib/Target/R600/AMDGPUMachineFunction.cpp | 8 +- lib/Target/R600/AMDGPUMachineFunction.h | 19 +- lib/Target/R600/AMDGPUPromoteAlloca.cpp | 48 +- lib/Target/R600/AMDGPURegisterInfo.h | 6 +- lib/Target/R600/AMDGPUSubtarget.cpp | 79 +- lib/Target/R600/AMDGPUSubtarget.h | 80 +- lib/Target/R600/AMDGPUTargetMachine.cpp | 95 +- lib/Target/R600/AMDGPUTargetMachine.h | 35 +- lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 58 +- lib/Target/R600/AMDILCFGStructurizer.cpp | 5 +- lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp | 320 + lib/Target/R600/AsmParser/CMakeLists.txt | 3 + lib/Target/R600/AsmParser/LLVMBuild.txt | 23 + lib/Target/R600/AsmParser/Makefile | 15 + lib/Target/R600/CMakeLists.txt | 7 +- lib/Target/R600/CaymanInstructions.td | 2 + lib/Target/R600/EvergreenInstructions.td | 76 +- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 224 +- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 20 +- lib/Target/R600/LLVMBuild.txt | 5 +- lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp | 57 +- .../R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp | 3 +- lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h | 34 + lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 22 +- lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h | 18 +- lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h | 12 +- .../R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 9 +- lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h | 6 +- lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 58 +- lib/Target/R600/Makefile | 4 +- lib/Target/R600/R600ClauseMergePass.cpp | 3 +- lib/Target/R600/R600ControlFlowFinalizer.cpp | 25 +- lib/Target/R600/R600Defines.h | 6 +- lib/Target/R600/R600EmitClauseMarkers.cpp | 3 +- lib/Target/R600/R600ExpandSpecialInstrs.cpp | 3 +- lib/Target/R600/R600ISelLowering.cpp | 178 +- lib/Target/R600/R600ISelLowering.h | 6 +- lib/Target/R600/R600InstrFormats.td | 3 + lib/Target/R600/R600InstrInfo.cpp | 35 +- lib/Target/R600/R600InstrInfo.h | 13 +- lib/Target/R600/R600Instructions.td | 39 +- lib/Target/R600/R600MachineFunctionInfo.h | 6 +- lib/Target/R600/R600MachineScheduler.cpp | 33 +- lib/Target/R600/R600MachineScheduler.h | 4 +- lib/Target/R600/R600OptimizeVectorRegisters.cpp | 8 +- lib/Target/R600/R600Packetizer.cpp | 15 +- lib/Target/R600/R600RegisterInfo.h | 6 +- lib/Target/R600/SIDefines.h | 41 +- lib/Target/R600/SIFixSGPRCopies.cpp | 70 +- lib/Target/R600/SIFixSGPRLiveRanges.cpp | 147 +- lib/Target/R600/SIISelLowering.cpp | 1507 +++-- lib/Target/R600/SIISelLowering.h | 56 +- lib/Target/R600/SIInsertWaits.cpp | 13 +- lib/Target/R600/SIInstrFormats.td | 376 +- lib/Target/R600/SIInstrInfo.cpp | 1521 ++++- lib/Target/R600/SIInstrInfo.h | 129 +- lib/Target/R600/SIInstrInfo.td | 1161 +++- lib/Target/R600/SIInstructions.td | 2761 ++++---- lib/Target/R600/SIIntrinsics.td | 85 +- lib/Target/R600/SILoadStoreOptimizer.cpp | 417 ++ lib/Target/R600/SILowerControlFlow.cpp | 70 +- lib/Target/R600/SILowerI1Copies.cpp | 45 +- lib/Target/R600/SIMachineFunctionInfo.cpp | 94 +- lib/Target/R600/SIMachineFunctionInfo.h | 39 +- lib/Target/R600/SIRegisterInfo.cpp | 351 +- lib/Target/R600/SIRegisterInfo.h | 62 +- lib/Target/R600/SIRegisterInfo.td | 44 +- lib/Target/R600/SIShrinkInstructions.cpp | 271 + lib/Target/R600/SITypeRewriter.cpp | 2 +- lib/Target/README.txt | 84 +- lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 8 +- lib/Target/Sparc/CMakeLists.txt | 5 +- lib/Target/Sparc/DelaySlotFiller.cpp | 9 +- lib/Target/Sparc/Disassembler/LLVMBuild.txt | 2 +- .../Sparc/Disassembler/SparcDisassembler.cpp | 61 +- lib/Target/Sparc/InstPrinter/SparcInstPrinter.h | 4 +- lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h | 4 +- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 4 +- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h | 4 +- lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp | 5 +- lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h | 7 +- .../Sparc/MCTargetDesc/SparcMCTargetDesc.cpp | 6 +- lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h | 4 +- lib/Target/Sparc/Makefile | 2 +- lib/Target/Sparc/README.txt | 2 - lib/Target/Sparc/Sparc.h | 6 +- lib/Target/Sparc/SparcAsmPrinter.cpp | 5 +- lib/Target/Sparc/SparcCodeEmitter.cpp | 280 - lib/Target/Sparc/SparcFrameLowering.cpp | 6 +- lib/Target/Sparc/SparcFrameLowering.h | 4 +- lib/Target/Sparc/SparcISelDAGToDAG.cpp | 15 +- lib/Target/Sparc/SparcISelLowering.cpp | 63 +- lib/Target/Sparc/SparcISelLowering.h | 4 +- lib/Target/Sparc/SparcInstrInfo.h | 4 +- lib/Target/Sparc/SparcInstrInfo.td | 2 +- lib/Target/Sparc/SparcInstrVIS.td | 34 +- lib/Target/Sparc/SparcJITInfo.cpp | 326 - lib/Target/Sparc/SparcJITInfo.h | 67 - lib/Target/Sparc/SparcMachineFunctionInfo.h | 4 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 6 +- lib/Target/Sparc/SparcRegisterInfo.h | 4 +- lib/Target/Sparc/SparcRelocations.h | 56 - lib/Target/Sparc/SparcSelectionDAGInfo.h | 4 +- lib/Target/Sparc/SparcSubtarget.h | 25 +- lib/Target/Sparc/SparcTargetMachine.cpp | 18 +- lib/Target/Sparc/SparcTargetMachine.h | 29 +- lib/Target/Sparc/SparcTargetObjectFile.h | 4 +- lib/Target/Sparc/SparcTargetStreamer.h | 4 +- lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 8 +- lib/Target/SystemZ/CMakeLists.txt | 2 +- lib/Target/SystemZ/Disassembler/LLVMBuild.txt | 2 +- .../SystemZ/Disassembler/SystemZDisassembler.cpp | 21 +- .../SystemZ/InstPrinter/SystemZInstPrinter.h | 4 +- lib/Target/SystemZ/LLVMBuild.txt | 2 +- .../SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp | 7 - lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h | 7 +- lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h | 4 +- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp | 15 +- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.h | 4 +- lib/Target/SystemZ/Makefile | 1 - lib/Target/SystemZ/SystemZ.h | 4 +- lib/Target/SystemZ/SystemZAsmPrinter.cpp | 5 +- lib/Target/SystemZ/SystemZAsmPrinter.h | 4 +- lib/Target/SystemZ/SystemZCallingConv.h | 4 +- lib/Target/SystemZ/SystemZConstantPoolValue.h | 4 +- lib/Target/SystemZ/SystemZElimCompare.cpp | 4 +- lib/Target/SystemZ/SystemZFrameLowering.cpp | 16 +- lib/Target/SystemZ/SystemZFrameLowering.h | 4 +- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 12 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 54 +- lib/Target/SystemZ/SystemZISelLowering.h | 11 +- lib/Target/SystemZ/SystemZInstrBuilder.h | 4 +- lib/Target/SystemZ/SystemZInstrInfo.cpp | 2 +- lib/Target/SystemZ/SystemZInstrInfo.h | 4 +- lib/Target/SystemZ/SystemZLongBranch.cpp | 2 +- lib/Target/SystemZ/SystemZMCInstLower.h | 4 +- lib/Target/SystemZ/SystemZMachineFunctionInfo.h | 4 +- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 8 +- lib/Target/SystemZ/SystemZRegisterInfo.h | 4 +- lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 4 +- lib/Target/SystemZ/SystemZShortenInst.cpp | 2 +- lib/Target/SystemZ/SystemZSubtarget.h | 22 +- lib/Target/SystemZ/SystemZTargetMachine.cpp | 5 +- lib/Target/SystemZ/SystemZTargetMachine.h | 28 +- lib/Target/Target.cpp | 2 +- lib/Target/TargetJITInfo.cpp | 14 - lib/Target/TargetLibraryInfo.cpp | 16 +- lib/Target/TargetLoweringObjectFile.cpp | 28 +- lib/Target/TargetMachine.cpp | 25 +- lib/Target/TargetMachineC.cpp | 15 +- lib/Target/TargetSubtargetInfo.cpp | 13 +- lib/Target/X86/Android.mk | 3 - lib/Target/X86/AsmParser/CMakeLists.txt | 3 + lib/Target/X86/AsmParser/LLVMBuild.txt | 2 +- lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp | 981 ++- lib/Target/X86/AsmParser/X86AsmInstrumentation.h | 28 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 463 +- lib/Target/X86/AsmParser/X86AsmParserCommon.h | 10 +- lib/Target/X86/AsmParser/X86Operand.h | 23 +- lib/Target/X86/CMakeLists.txt | 3 - lib/Target/X86/Disassembler/LLVMBuild.txt | 2 +- lib/Target/X86/Disassembler/X86Disassembler.cpp | 80 +- lib/Target/X86/Disassembler/X86Disassembler.h | 16 +- .../X86/Disassembler/X86DisassemblerDecoder.cpp | 121 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 4 +- .../Disassembler/X86DisassemblerDecoderCommon.h | 27 +- lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 28 +- lib/Target/X86/InstPrinter/X86ATTInstPrinter.h | 15 +- lib/Target/X86/InstPrinter/X86InstComments.cpp | 266 +- lib/Target/X86/InstPrinter/X86InstComments.h | 6 +- lib/Target/X86/InstPrinter/X86IntelInstPrinter.h | 4 +- lib/Target/X86/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 28 +- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 131 +- lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp | 2 +- lib/Target/X86/MCTargetDesc/X86FixupKinds.h | 4 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 24 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h | 6 +- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 120 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 14 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 4 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 13 +- lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp | 13 +- lib/Target/X86/README.txt | 177 - lib/Target/X86/Utils/LLVMBuild.txt | 2 +- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 177 + lib/Target/X86/Utils/X86ShuffleDecode.h | 31 +- lib/Target/X86/X86.h | 14 +- lib/Target/X86/X86.td | 70 +- lib/Target/X86/X86AsmPrinter.cpp | 30 +- lib/Target/X86/X86AsmPrinter.h | 78 +- lib/Target/X86/X86AtomicExpandPass.cpp | 287 - lib/Target/X86/X86CallingConv.h | 17 +- lib/Target/X86/X86CallingConv.td | 81 +- lib/Target/X86/X86CodeEmitter.cpp | 1498 ----- lib/Target/X86/X86FastISel.cpp | 950 ++- lib/Target/X86/X86FixupLEAs.cpp | 13 +- lib/Target/X86/X86FloatingPoint.cpp | 392 +- lib/Target/X86/X86FrameLowering.cpp | 244 +- lib/Target/X86/X86FrameLowering.h | 9 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 216 +- lib/Target/X86/X86ISelLowering.cpp | 6818 +++++++++++++++----- lib/Target/X86/X86ISelLowering.h | 249 +- lib/Target/X86/X86InstrAVX512.td | 2791 ++++---- lib/Target/X86/X86InstrArithmetic.td | 48 +- lib/Target/X86/X86InstrBuilder.h | 4 +- lib/Target/X86/X86InstrCompiler.td | 126 +- lib/Target/X86/X86InstrExtension.td | 14 +- lib/Target/X86/X86InstrFPStack.td | 3 - lib/Target/X86/X86InstrFormats.td | 93 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 31 +- lib/Target/X86/X86InstrInfo.cpp | 464 +- lib/Target/X86/X86InstrInfo.h | 9 +- lib/Target/X86/X86InstrInfo.td | 33 +- lib/Target/X86/X86InstrMMX.td | 25 +- lib/Target/X86/X86InstrSGX.td | 24 + lib/Target/X86/X86InstrSSE.td | 627 +- lib/Target/X86/X86InstrSystem.td | 12 +- lib/Target/X86/X86IntrinsicsInfo.h | 320 + lib/Target/X86/X86JITInfo.cpp | 588 -- lib/Target/X86/X86JITInfo.h | 79 - lib/Target/X86/X86MCInstLower.cpp | 415 +- lib/Target/X86/X86MachineFunctionInfo.h | 25 +- lib/Target/X86/X86PadShortFunction.cpp | 5 +- lib/Target/X86/X86RegisterInfo.cpp | 35 +- lib/Target/X86/X86RegisterInfo.h | 4 +- lib/Target/X86/X86RegisterInfo.td | 41 +- lib/Target/X86/X86Relocations.h | 52 - lib/Target/X86/X86SchedHaswell.td | 1885 +++++- lib/Target/X86/X86SchedSandyBridge.td | 1 + lib/Target/X86/X86Schedule.td | 21 +- lib/Target/X86/X86ScheduleAtom.td | 6 + lib/Target/X86/X86ScheduleBtVer2.td | 341 + lib/Target/X86/X86ScheduleSLM.td | 2 + lib/Target/X86/X86SelectionDAGInfo.cpp | 37 +- lib/Target/X86/X86SelectionDAGInfo.h | 9 +- lib/Target/X86/X86Subtarget.cpp | 99 +- lib/Target/X86/X86Subtarget.h | 100 +- lib/Target/X86/X86TargetMachine.cpp | 91 +- lib/Target/X86/X86TargetMachine.h | 37 +- lib/Target/X86/X86TargetObjectFile.cpp | 63 + lib/Target/X86/X86TargetObjectFile.h | 9 +- lib/Target/X86/X86TargetTransformInfo.cpp | 112 +- lib/Target/X86/X86VZeroUpper.cpp | 2 +- lib/Target/XCore/Disassembler/LLVMBuild.txt | 2 +- .../XCore/Disassembler/XCoreDisassembler.cpp | 57 +- lib/Target/XCore/InstPrinter/XCoreInstPrinter.h | 4 +- lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp | 1 - lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h | 4 +- .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp | 8 +- lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h | 4 +- lib/Target/XCore/XCore.h | 4 +- lib/Target/XCore/XCoreAsmPrinter.cpp | 4 +- lib/Target/XCore/XCoreFrameLowering.cpp | 26 +- lib/Target/XCore/XCoreFrameLowering.h | 6 +- lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp | 3 +- lib/Target/XCore/XCoreISelLowering.cpp | 60 +- lib/Target/XCore/XCoreISelLowering.h | 6 +- lib/Target/XCore/XCoreInstrInfo.cpp | 9 +- lib/Target/XCore/XCoreInstrInfo.h | 4 +- lib/Target/XCore/XCoreInstrInfo.td | 4 +- lib/Target/XCore/XCoreMCInstLower.h | 4 +- lib/Target/XCore/XCoreMachineFunctionInfo.h | 6 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 17 +- lib/Target/XCore/XCoreRegisterInfo.h | 4 +- lib/Target/XCore/XCoreSelectionDAGInfo.cpp | 2 +- lib/Target/XCore/XCoreSelectionDAGInfo.h | 4 +- lib/Target/XCore/XCoreSubtarget.h | 22 +- lib/Target/XCore/XCoreTargetMachine.cpp | 11 + lib/Target/XCore/XCoreTargetMachine.h | 27 +- lib/Target/XCore/XCoreTargetObjectFile.cpp | 11 +- lib/Target/XCore/XCoreTargetObjectFile.h | 7 +- lib/Target/XCore/XCoreTargetStreamer.h | 4 +- lib/Target/XCore/XCoreTargetTransformInfo.cpp | 6 +- lib/Transforms/Hello/CMakeLists.txt | 4 + lib/Transforms/IPO/ArgumentPromotion.cpp | 129 +- lib/Transforms/IPO/ConstantMerge.cpp | 2 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 28 +- lib/Transforms/IPO/ExtractGV.cpp | 10 +- lib/Transforms/IPO/FunctionAttrs.cpp | 30 +- lib/Transforms/IPO/GlobalDCE.cpp | 51 +- lib/Transforms/IPO/GlobalOpt.cpp | 109 +- lib/Transforms/IPO/InlineAlways.cpp | 4 + lib/Transforms/IPO/InlineSimple.cpp | 4 + lib/Transforms/IPO/Inliner.cpp | 20 +- lib/Transforms/IPO/Internalize.cpp | 4 +- lib/Transforms/IPO/MergeFunctions.cpp | 91 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 210 +- lib/Transforms/IPO/StripSymbols.cpp | 27 +- lib/Transforms/InstCombine/InstCombine.h | 54 +- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 188 +- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 372 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 193 +- lib/Transforms/InstCombine/InstCombineCasts.cpp | 129 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 227 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 192 +- .../InstCombine/InstCombineMulDivRem.cpp | 333 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 12 +- lib/Transforms/InstCombine/InstCombineSelect.cpp | 45 +- lib/Transforms/InstCombine/InstCombineShifts.cpp | 53 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 77 +- lib/Transforms/InstCombine/InstCombineWorklist.h | 4 +- .../InstCombine/InstructionCombining.cpp | 252 +- .../Instrumentation/AddressSanitizer.cpp | 361 +- lib/Transforms/Instrumentation/Android.mk | 1 + lib/Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/DataFlowSanitizer.cpp | 282 +- lib/Transforms/Instrumentation/DebugIR.cpp | 8 +- lib/Transforms/Instrumentation/DebugIR.h | 6 +- lib/Transforms/Instrumentation/GCOVProfiling.cpp | 14 +- lib/Transforms/Instrumentation/Instrumentation.cpp | 1 + lib/Transforms/Instrumentation/MemorySanitizer.cpp | 140 +- .../Instrumentation/SanitizerCoverage.cpp | 294 + lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 42 +- lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h | 6 +- lib/Transforms/ObjCARC/Android.mk | 3 +- lib/Transforms/ObjCARC/CMakeLists.txt | 1 + lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 10 +- lib/Transforms/ObjCARC/DependencyAnalysis.h | 10 +- lib/Transforms/ObjCARC/ObjCARC.cpp | 1 + lib/Transforms/ObjCARC/ObjCARC.h | 20 +- lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 6 +- lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h | 6 +- lib/Transforms/ObjCARC/ObjCARCContract.cpp | 13 +- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 83 +- lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp | 4 +- lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 6 +- .../ObjCARC/ProvenanceAnalysisEvaluator.cpp | 92 + lib/Transforms/Scalar/ADCE.cpp | 2 +- lib/Transforms/Scalar/AlignmentFromAssumptions.cpp | 428 ++ lib/Transforms/Scalar/Android.mk | 3 + lib/Transforms/Scalar/CMakeLists.txt | 2 + lib/Transforms/Scalar/ConstantHoisting.cpp | 4 +- .../Scalar/CorrelatedValuePropagation.cpp | 18 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 9 +- lib/Transforms/Scalar/EarlyCSE.cpp | 29 +- lib/Transforms/Scalar/GVN.cpp | 72 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 179 +- lib/Transforms/Scalar/JumpThreading.cpp | 131 +- lib/Transforms/Scalar/LICM.cpp | 64 +- lib/Transforms/Scalar/LLVMBuild.txt | 2 +- lib/Transforms/Scalar/LoadCombine.cpp | 25 +- lib/Transforms/Scalar/LoopDeletion.cpp | 5 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 10 +- lib/Transforms/Scalar/LoopRerollPass.cpp | 58 +- lib/Transforms/Scalar/LoopRotation.cpp | 48 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 72 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 155 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 23 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 66 +- lib/Transforms/Scalar/MergedLoadStoreMotion.cpp | 604 ++ lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp | 4 + lib/Transforms/Scalar/Reassociate.cpp | 469 +- lib/Transforms/Scalar/SCCP.cpp | 8 +- lib/Transforms/Scalar/SROA.cpp | 539 +- lib/Transforms/Scalar/SampleProfile.cpp | 570 +- lib/Transforms/Scalar/Scalar.cpp | 19 + lib/Transforms/Scalar/ScalarReplAggregates.cpp | 35 +- lib/Transforms/Scalar/Scalarizer.cpp | 33 +- .../Scalar/SeparateConstOffsetFromGEP.cpp | 391 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 27 +- lib/Transforms/Scalar/Sink.cpp | 11 +- lib/Transforms/Scalar/StructurizeCFG.cpp | 6 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 44 +- lib/Transforms/Utils/AddDiscriminators.cpp | 6 +- lib/Transforms/Utils/Android.mk | 2 + lib/Transforms/Utils/BasicBlockUtils.cpp | 29 +- lib/Transforms/Utils/BreakCriticalEdges.cpp | 24 +- lib/Transforms/Utils/BuildLibCalls.cpp | 56 +- lib/Transforms/Utils/CMakeLists.txt | 7 +- lib/Transforms/Utils/CloneModule.cpp | 9 + lib/Transforms/Utils/CtorUtils.cpp | 71 +- lib/Transforms/Utils/FlattenCFG.cpp | 9 +- lib/Transforms/Utils/GlobalStatus.cpp | 7 +- lib/Transforms/Utils/InlineFunction.cpp | 486 +- lib/Transforms/Utils/IntegerDivision.cpp | 16 +- lib/Transforms/Utils/Local.cpp | 136 +- lib/Transforms/Utils/LoopSimplify.cpp | 51 +- lib/Transforms/Utils/LoopUnroll.cpp | 81 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 257 +- lib/Transforms/Utils/LowerSwitch.cpp | 48 +- lib/Transforms/Utils/Mem2Reg.cpp | 6 +- lib/Transforms/Utils/ModuleUtils.cpp | 2 +- lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 35 +- lib/Transforms/Utils/SimplifyCFG.cpp | 786 ++- lib/Transforms/Utils/SimplifyIndVar.cpp | 23 +- lib/Transforms/Utils/SimplifyInstructions.cpp | 6 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 3661 +++++------ lib/Transforms/Utils/SymbolRewriter.cpp | 525 ++ lib/Transforms/Utils/ValueMapper.cpp | 6 +- lib/Transforms/Vectorize/BBVectorize.cpp | 37 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 921 ++- lib/Transforms/Vectorize/SLPVectorizer.cpp | 1304 +++- 1404 files changed, 100310 insertions(+), 57214 deletions(-) create mode 100644 lib/Analysis/AssumptionTracker.cpp create mode 100644 lib/Analysis/CFLAliasAnalysis.cpp create mode 100644 lib/Analysis/FunctionTargetTransformInfo.cpp create mode 100644 lib/Analysis/ScopedNoAliasAA.cpp create mode 100644 lib/Analysis/StratifiedSets.h create mode 100644 lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfCompileUnit.h create mode 100644 lib/CodeGen/AsmPrinter/Win64Exception.h delete mode 100644 lib/CodeGen/AtomicExpandLoadLinkedPass.cpp create mode 100644 lib/CodeGen/AtomicExpandPass.cpp create mode 100644 lib/CodeGen/ForwardControlFlowIntegrity.cpp delete mode 100644 lib/CodeGen/JITCodeEmitter.cpp delete mode 100644 lib/CodeGen/MachineCodeEmitter.cpp create mode 100644 lib/CodeGen/MachineCombiner.cpp create mode 100644 lib/CodeGen/MachineDominanceFrontier.cpp create mode 100644 lib/CodeGen/MachineRegionInfo.cpp delete mode 100644 lib/CodeGen/Spiller.cpp create mode 100644 lib/DebugInfo/DWARFAcceleratorTable.cpp create mode 100644 lib/DebugInfo/DWARFAcceleratorTable.h create mode 100644 lib/DebugInfo/DWARFSection.h delete mode 100644 lib/ExecutionEngine/JIT/Android.mk delete mode 100644 lib/ExecutionEngine/JIT/CMakeLists.txt delete mode 100644 lib/ExecutionEngine/JIT/JIT.cpp delete mode 100644 lib/ExecutionEngine/JIT/JIT.h delete mode 100644 lib/ExecutionEngine/JIT/JITEmitter.cpp delete mode 100644 lib/ExecutionEngine/JIT/JITMemoryManager.cpp delete mode 100644 lib/ExecutionEngine/JIT/LLVMBuild.txt delete mode 100644 lib/ExecutionEngine/JIT/Makefile create mode 100644 lib/ExecutionEngine/JITEventListener.cpp create mode 100644 lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h create mode 100644 lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h create mode 100644 lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h create mode 100644 lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h create mode 100644 lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h create mode 100644 lib/IR/UseListOrder.cpp delete mode 100644 lib/MC/MCAnalysis/Android.mk delete mode 100644 lib/MC/MCAnalysis/CMakeLists.txt delete mode 100644 lib/MC/MCAnalysis/LLVMBuild.txt delete mode 100644 lib/MC/MCAnalysis/MCAtom.cpp delete mode 100644 lib/MC/MCAnalysis/MCFunction.cpp delete mode 100644 lib/MC/MCAnalysis/MCModule.cpp delete mode 100644 lib/MC/MCAnalysis/MCModuleYAML.cpp delete mode 100644 lib/MC/MCAnalysis/MCObjectDisassembler.cpp delete mode 100644 lib/MC/MCAnalysis/MCObjectSymbolizer.cpp delete mode 100644 lib/MC/MCAnalysis/Makefile delete mode 100644 lib/MC/MCDisassembler.cpp create mode 100644 lib/MC/MCDisassembler/MCDisassembler.cpp create mode 100644 lib/MC/MCDisassembler/MCExternalSymbolizer.cpp create mode 100644 lib/MC/MCDisassembler/MCRelocationInfo.cpp delete mode 100644 lib/MC/MCExternalSymbolizer.cpp delete mode 100644 lib/MC/MCRelocationInfo.cpp create mode 100644 lib/MC/MCWinEH.cpp create mode 100644 lib/ProfileData/CoverageMapping.cpp create mode 100644 lib/ProfileData/CoverageMappingReader.cpp create mode 100644 lib/ProfileData/CoverageMappingWriter.cpp create mode 100644 lib/ProfileData/SampleProf.cpp create mode 100644 lib/ProfileData/SampleProfReader.cpp create mode 100644 lib/ProfileData/SampleProfWriter.cpp delete mode 100644 lib/Support/Disassembler.cpp delete mode 100644 lib/Support/IncludeFile.cpp create mode 100644 lib/Support/MathExtras.cpp create mode 100644 lib/Support/Options.cpp delete mode 100644 lib/Support/StreamableMemoryObject.cpp create mode 100644 lib/Support/StreamingMemoryObject.cpp delete mode 100644 lib/Support/StringRefMemoryObject.cpp create mode 100644 lib/Target/AArch64/AArch64ConditionOptimizer.cpp create mode 100644 lib/Target/AArch64/AArch64MachineCombinerPattern.h create mode 100644 lib/Target/AArch64/AArch64PBQPRegAlloc.cpp create mode 100644 lib/Target/AArch64/AArch64PBQPRegAlloc.h delete mode 100644 lib/Target/ARM/ARMCodeEmitter.cpp delete mode 100644 lib/Target/ARM/ARMJITInfo.cpp delete mode 100644 lib/Target/ARM/ARMJITInfo.h delete mode 100644 lib/Target/ARM/ARMRelocations.h create mode 100644 lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h create mode 100644 lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h create mode 100644 lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h create mode 100644 lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h create mode 100644 lib/Target/Hexagon/Disassembler/CMakeLists.txt create mode 100644 lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp create mode 100644 lib/Target/Hexagon/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/Hexagon/Disassembler/Makefile delete mode 100644 lib/Target/Hexagon/InstPrinter/CMakeLists.txt delete mode 100644 lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp delete mode 100644 lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h delete mode 100644 lib/Target/Hexagon/InstPrinter/LLVMBuild.txt delete mode 100644 lib/Target/Hexagon/InstPrinter/Makefile create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h create mode 100644 lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp create mode 100644 lib/Target/Mips/MipsABIInfo.cpp create mode 100644 lib/Target/Mips/MipsABIInfo.h create mode 100644 lib/Target/Mips/MipsCCState.cpp create mode 100644 lib/Target/Mips/MipsCCState.h delete mode 100644 lib/Target/Mips/MipsCodeEmitter.cpp delete mode 100644 lib/Target/Mips/MipsJITInfo.cpp delete mode 100644 lib/Target/Mips/MipsJITInfo.h create mode 100644 lib/Target/Mips/MipsOptionRecord.h delete mode 100644 lib/Target/Mips/MipsRelocations.h create mode 100644 lib/Target/NVPTX/NVPTXLowerStructArgs.cpp create mode 100644 lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp delete mode 100644 lib/Target/PowerPC/PPCCodeEmitter.cpp create mode 100644 lib/Target/PowerPC/PPCInstrSPE.td delete mode 100644 lib/Target/PowerPC/PPCJITInfo.cpp delete mode 100644 lib/Target/PowerPC/PPCJITInfo.h delete mode 100644 lib/Target/PowerPC/PPCRelocations.h create mode 100644 lib/Target/R600/AMDGPUAlwaysInlinePass.cpp create mode 100644 lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp create mode 100644 lib/Target/R600/AsmParser/CMakeLists.txt create mode 100644 lib/Target/R600/AsmParser/LLVMBuild.txt create mode 100644 lib/Target/R600/AsmParser/Makefile create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUFixupKinds.h create mode 100644 lib/Target/R600/SILoadStoreOptimizer.cpp create mode 100644 lib/Target/R600/SIShrinkInstructions.cpp delete mode 100644 lib/Target/Sparc/SparcCodeEmitter.cpp delete mode 100644 lib/Target/Sparc/SparcJITInfo.cpp delete mode 100644 lib/Target/Sparc/SparcJITInfo.h delete mode 100644 lib/Target/Sparc/SparcRelocations.h delete mode 100644 lib/Target/TargetJITInfo.cpp delete mode 100644 lib/Target/X86/X86AtomicExpandPass.cpp delete mode 100644 lib/Target/X86/X86CodeEmitter.cpp create mode 100644 lib/Target/X86/X86InstrSGX.td create mode 100644 lib/Target/X86/X86IntrinsicsInfo.h delete mode 100644 lib/Target/X86/X86JITInfo.cpp delete mode 100644 lib/Target/X86/X86JITInfo.h delete mode 100644 lib/Target/X86/X86Relocations.h create mode 100644 lib/Target/X86/X86ScheduleBtVer2.td create mode 100644 lib/Transforms/Instrumentation/SanitizerCoverage.cpp create mode 100644 lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp create mode 100644 lib/Transforms/Scalar/AlignmentFromAssumptions.cpp create mode 100644 lib/Transforms/Scalar/MergedLoadStoreMotion.cpp create mode 100644 lib/Transforms/Utils/SymbolRewriter.cpp (limited to 'lib') diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 5cde979..5171a45 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -196,17 +196,21 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { if (!Arg->getType()->isPointerTy()) continue; ModRefResult ArgMask; - Location CS1Loc = - getArgLocation(CS1, (unsigned) std::distance(CS1.arg_begin(), I), - ArgMask); - if ((getModRefInfo(CS2, CS1Loc) & ArgMask) != NoModRef) { - R = Mask; + Location CS1Loc = getArgLocation( + CS1, (unsigned)std::distance(CS1.arg_begin(), I), ArgMask); + // ArgMask indicates what CS1 might do to CS1Loc; if CS1 might Mod + // CS1Loc, then we care about either a Mod or a Ref by CS2. If CS1 + // might Ref, then we care only about a Mod by CS2. + ModRefResult ArgR = getModRefInfo(CS2, CS1Loc); + if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) || + ((ArgMask & Ref) != NoModRef && (ArgR & Mod) != NoModRef)) + R = ModRefResult((R | ArgMask) & Mask); + + if (R == Mask) break; - } } } - if (R == NoModRef) - return R; + return R; } // If this is the end of the chain, don't forward. @@ -247,61 +251,73 @@ AliasAnalysis::getModRefBehavior(const Function *F) { //===----------------------------------------------------------------------===// AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) { + AAMDNodes AATags; + LI->getAAMetadata(AATags); + return Location(LI->getPointerOperand(), - getTypeStoreSize(LI->getType()), - LI->getMetadata(LLVMContext::MD_tbaa)); + getTypeStoreSize(LI->getType()), AATags); } AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) { + AAMDNodes AATags; + SI->getAAMetadata(AATags); + return Location(SI->getPointerOperand(), - getTypeStoreSize(SI->getValueOperand()->getType()), - SI->getMetadata(LLVMContext::MD_tbaa)); + getTypeStoreSize(SI->getValueOperand()->getType()), AATags); } AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) { - return Location(VI->getPointerOperand(), - UnknownSize, - VI->getMetadata(LLVMContext::MD_tbaa)); + AAMDNodes AATags; + VI->getAAMetadata(AATags); + + return Location(VI->getPointerOperand(), UnknownSize, AATags); } AliasAnalysis::Location AliasAnalysis::getLocation(const AtomicCmpXchgInst *CXI) { + AAMDNodes AATags; + CXI->getAAMetadata(AATags); + return Location(CXI->getPointerOperand(), getTypeStoreSize(CXI->getCompareOperand()->getType()), - CXI->getMetadata(LLVMContext::MD_tbaa)); + AATags); } AliasAnalysis::Location AliasAnalysis::getLocation(const AtomicRMWInst *RMWI) { + AAMDNodes AATags; + RMWI->getAAMetadata(AATags); + return Location(RMWI->getPointerOperand(), - getTypeStoreSize(RMWI->getValOperand()->getType()), - RMWI->getMetadata(LLVMContext::MD_tbaa)); + getTypeStoreSize(RMWI->getValOperand()->getType()), AATags); } -AliasAnalysis::Location +AliasAnalysis::Location AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) { uint64_t Size = UnknownSize; if (ConstantInt *C = dyn_cast(MTI->getLength())) Size = C->getValue().getZExtValue(); - // memcpy/memmove can have TBAA tags. For memcpy, they apply + // memcpy/memmove can have AA tags. For memcpy, they apply // to both the source and the destination. - MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa); + AAMDNodes AATags; + MTI->getAAMetadata(AATags); - return Location(MTI->getRawSource(), Size, TBAATag); + return Location(MTI->getRawSource(), Size, AATags); } -AliasAnalysis::Location +AliasAnalysis::Location AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) { uint64_t Size = UnknownSize; if (ConstantInt *C = dyn_cast(MTI->getLength())) Size = C->getValue().getZExtValue(); - // memcpy/memmove can have TBAA tags. For memcpy, they apply + // memcpy/memmove can have AA tags. For memcpy, they apply // to both the source and the destination. - MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa); - - return Location(MTI->getRawDest(), Size, TBAATag); + AAMDNodes AATags; + MTI->getAAMetadata(AATags); + + return Location(MTI->getRawDest(), Size, AATags); } @@ -383,53 +399,6 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { return ModRef; } -namespace { - /// Only find pointer captures which happen before the given instruction. Uses - /// the dominator tree to determine whether one instruction is before another. - /// Only support the case where the Value is defined in the same basic block - /// as the given instruction and the use. - struct CapturesBefore : public CaptureTracker { - CapturesBefore(const Instruction *I, DominatorTree *DT) - : BeforeHere(I), DT(DT), Captured(false) {} - - void tooManyUses() override { Captured = true; } - - bool shouldExplore(const Use *U) override { - Instruction *I = cast(U->getUser()); - BasicBlock *BB = I->getParent(); - // We explore this usage only if the usage can reach "BeforeHere". - // If use is not reachable from entry, there is no need to explore. - if (BeforeHere != I && !DT->isReachableFromEntry(BB)) - return false; - // If the value is defined in the same basic block as use and BeforeHere, - // there is no need to explore the use if BeforeHere dominates use. - // Check whether there is a path from I to BeforeHere. - if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !isPotentiallyReachable(I, BeforeHere, DT)) - return false; - return true; - } - - bool captured(const Use *U) override { - Instruction *I = cast(U->getUser()); - BasicBlock *BB = I->getParent(); - // Same logic as in shouldExplore. - if (BeforeHere != I && !DT->isReachableFromEntry(BB)) - return false; - if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !isPotentiallyReachable(I, BeforeHere, DT)) - return false; - Captured = true; - return true; - } - - const Instruction *BeforeHere; - DominatorTree *DT; - - bool Captured; - }; -} - // FIXME: this is really just shoring-up a deficiency in alias analysis. // BasicAA isn't willing to spend linear time determining whether an alloca // was captured before or after this particular call, while we are. However, @@ -449,9 +418,9 @@ AliasAnalysis::callCapturesBefore(const Instruction *I, if (!CS.getInstruction() || CS.getInstruction() == Object) return AliasAnalysis::ModRef; - CapturesBefore CB(I, DT); - llvm::PointerMayBeCaptured(Object, &CB); - if (CB.Captured) + if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true, + /* StoreCaptures */ true, I, DT, + /* include Object */ true)) return AliasAnalysis::ModRef; unsigned ArgNo = 0; @@ -470,7 +439,7 @@ AliasAnalysis::callCapturesBefore(const Instruction *I, // assume that the call could touch the pointer, even though it doesn't // escape. if (isNoAlias(AliasAnalysis::Location(*CI), - AliasAnalysis::Location(Object))) + AliasAnalysis::Location(Object))) continue; if (CS.doesNotAccessMemory(ArgNo)) continue; @@ -577,3 +546,13 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } + +/// isIdentifiedFunctionLocal - Return true if V is umabigously identified +/// at the function-level. Different IdentifiedFunctionLocals can't alias. +/// Further, an IdentifiedFunctionLocal can not alias with any function +/// arguments other than itself, which is not necessarily true for +/// IdentifiedObjects. +bool llvm::isIdentifiedFunctionLocal(const Value *V) +{ + return isa(V) || isNoAliasCall(V) || isNoAliasArgument(V); +} diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index d9fa5a5..fe4bd4c 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -43,7 +43,7 @@ static cl::opt PrintMod("print-mod", cl::ReallyHidden); static cl::opt PrintRef("print-ref", cl::ReallyHidden); static cl::opt PrintModRef("print-modref", cl::ReallyHidden); -static cl::opt EvalTBAA("evaluate-tbaa", cl::ReallyHidden); +static cl::opt EvalAAMD("evaluate-aa-metadata", cl::ReallyHidden); namespace { class AAEval : public FunctionPass { @@ -153,9 +153,9 @@ bool AAEval::runOnFunction(Function &F) { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); - if (EvalTBAA && isa(&*I)) + if (EvalAAMD && isa(&*I)) Loads.insert(&*I); - if (EvalTBAA && isa(&*I)) + if (EvalAAMD && isa(&*I)) Stores.insert(&*I); Instruction &Inst = *I; if (CallSite CS = cast(&Inst)) { @@ -213,7 +213,7 @@ bool AAEval::runOnFunction(Function &F) { } } - if (EvalTBAA) { + if (EvalAAMD) { // iterate over all pairs of load, store for (SetVector::iterator I1 = Loads.begin(), E = Loads.end(); I1 != E; ++I1) { diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index a45fe23..45442b0 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -47,18 +47,21 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { // If the pointers are not a must-alias pair, this set becomes a may alias. if (AA.alias(AliasAnalysis::Location(L->getValue(), L->getSize(), - L->getTBAAInfo()), + L->getAAInfo()), AliasAnalysis::Location(R->getValue(), R->getSize(), - R->getTBAAInfo())) + R->getAAInfo())) != AliasAnalysis::MustAlias) AliasTy = MayAlias; } + bool ASHadUnknownInsts = !AS.UnknownInsts.empty(); if (UnknownInsts.empty()) { // Merge call sites... - if (!AS.UnknownInsts.empty()) + if (ASHadUnknownInsts) { std::swap(UnknownInsts, AS.UnknownInsts); - } else if (!AS.UnknownInsts.empty()) { + addRef(); + } + } else if (ASHadUnknownInsts) { UnknownInsts.insert(UnknownInsts.end(), AS.UnknownInsts.begin(), AS.UnknownInsts.end()); AS.UnknownInsts.clear(); } @@ -76,6 +79,8 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { AS.PtrListEnd = &AS.PtrList; assert(*AS.PtrListEnd == nullptr && "End of list is not null?"); } + if (ASHadUnknownInsts) + AS.dropRef(AST); } void AliasSetTracker::removeAliasSet(AliasSet *AS) { @@ -92,7 +97,7 @@ void AliasSet::removeFromTracker(AliasSetTracker &AST) { } void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, - uint64_t Size, const MDNode *TBAAInfo, + uint64_t Size, const AAMDNodes &AAInfo, bool KnownMustAlias) { assert(!Entry.hasAliasSet() && "Entry already in set!"); @@ -102,17 +107,17 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, AliasAnalysis &AA = AST.getAliasAnalysis(); AliasAnalysis::AliasResult Result = AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(), - P->getTBAAInfo()), - AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo)); + P->getAAInfo()), + AliasAnalysis::Location(Entry.getValue(), Size, AAInfo)); if (Result != AliasAnalysis::MustAlias) AliasTy = MayAlias; else // First entry of must alias must have maximum size! - P->updateSizeAndTBAAInfo(Size, TBAAInfo); + P->updateSizeAndAAInfo(Size, AAInfo); assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!"); } Entry.setAliasSet(this); - Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); + Entry.updateSizeAndAAInfo(Size, AAInfo); // Add it to the end of the list... assert(*PtrListEnd == nullptr && "End of list is not null?"); @@ -123,6 +128,8 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, } void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { + if (UnknownInsts.empty()) + addRef(); UnknownInsts.push_back(I); if (!I->mayWriteToMemory()) { @@ -140,7 +147,7 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) { /// alias one of the members in the set. /// bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, AliasAnalysis &AA) const { if (AliasTy == MustAlias) { assert(UnknownInsts.empty() && "Illegal must alias set!"); @@ -151,23 +158,23 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size, assert(SomePtr && "Empty must-alias set??"); return AA.alias(AliasAnalysis::Location(SomePtr->getValue(), SomePtr->getSize(), - SomePtr->getTBAAInfo()), - AliasAnalysis::Location(Ptr, Size, TBAAInfo)); + SomePtr->getAAInfo()), + AliasAnalysis::Location(Ptr, Size, AAInfo)); } // If this is a may-alias set, we have to check all of the pointers in the set // to be sure it doesn't alias the set... for (iterator I = begin(), E = end(); I != E; ++I) - if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo), + if (AA.alias(AliasAnalysis::Location(Ptr, Size, AAInfo), AliasAnalysis::Location(I.getPointer(), I.getSize(), - I.getTBAAInfo()))) + I.getAAInfo()))) return true; // Check the unknown instructions... if (!UnknownInsts.empty()) { for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) if (AA.getModRefInfo(UnknownInsts[i], - AliasAnalysis::Location(Ptr, Size, TBAAInfo)) != + AliasAnalysis::Location(Ptr, Size, AAInfo)) != AliasAnalysis::NoModRef) return true; } @@ -190,7 +197,7 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const { for (iterator I = begin(), E = end(); I != E; ++I) if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(), I.getSize(), - I.getTBAAInfo())) != + I.getAAInfo())) != AliasAnalysis::NoModRef) return true; @@ -216,15 +223,16 @@ void AliasSetTracker::clear() { /// AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, uint64_t Size, - const MDNode *TBAAInfo) { + const AAMDNodes &AAInfo) { AliasSet *FoundSet = nullptr; - for (iterator I = begin(), E = end(); I != E; ++I) { - if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue; + for (iterator I = begin(), E = end(); I != E;) { + iterator Cur = I++; + if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue; if (!FoundSet) { // If this is the first alias set ptr can go into. - FoundSet = I; // Remember it. + FoundSet = Cur; // Remember it. } else { // Otherwise, we must merge the sets. - FoundSet->mergeSetIn(*I, *this); // Merge in contents. + FoundSet->mergeSetIn(*Cur, *this); // Merge in contents. } } @@ -235,25 +243,30 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, /// this alias set, false otherwise. This does not modify the AST object or /// alias sets. bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, - const MDNode *TBAAInfo) const { + const AAMDNodes &AAInfo) const { for (const_iterator I = begin(), E = end(); I != E; ++I) - if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) + if (!I->Forward && I->aliasesPointer(Ptr, Size, AAInfo, AA)) return true; return false; } - +bool AliasSetTracker::containsUnknown(Instruction *Inst) const { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if (!I->Forward && I->aliasesUnknownInst(Inst, AA)) + return true; + return false; +} AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { AliasSet *FoundSet = nullptr; - for (iterator I = begin(), E = end(); I != E; ++I) { - if (I->Forward || !I->aliasesUnknownInst(Inst, AA)) + for (iterator I = begin(), E = end(); I != E;) { + iterator Cur = I++; + if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA)) continue; - if (!FoundSet) // If this is the first alias set ptr can go into. - FoundSet = I; // Remember it. - else if (!I->Forward) // Otherwise, we must merge the sets. - FoundSet->mergeSetIn(*I, *this); // Merge in contents. + FoundSet = Cur; // Remember it. + else if (!Cur->Forward) // Otherwise, we must merge the sets. + FoundSet->mergeSetIn(*Cur, *this); // Merge in contents. } return FoundSet; } @@ -264,67 +277,75 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { /// getAliasSetForPointer - Return the alias set that the specified pointer /// lives in. AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, bool *New) { AliasSet::PointerRec &Entry = getEntryFor(Pointer); // Check to see if the pointer is already known. if (Entry.hasAliasSet()) { - Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); + Entry.updateSizeAndAAInfo(Size, AAInfo); // Return the set! return *Entry.getAliasSet(*this)->getForwardedTarget(*this); } - if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) { + if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, AAInfo)) { // Add it to the alias set it aliases. - AS->addPointer(*this, Entry, Size, TBAAInfo); + AS->addPointer(*this, Entry, Size, AAInfo); return *AS; } if (New) *New = true; // Otherwise create a new alias set to hold the loaded pointer. AliasSets.push_back(new AliasSet()); - AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo); + AliasSets.back().addPointer(*this, Entry, Size, AAInfo); return AliasSets.back(); } -bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { +bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) { bool NewPtr; - addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr); + addPointer(Ptr, Size, AAInfo, AliasSet::NoModRef, NewPtr); return NewPtr; } bool AliasSetTracker::add(LoadInst *LI) { if (LI->getOrdering() > Monotonic) return addUnknown(LI); + + AAMDNodes AAInfo; + LI->getAAMetadata(AAInfo); + AliasSet::AccessType ATy = AliasSet::Refs; bool NewPtr; AliasSet &AS = addPointer(LI->getOperand(0), AA.getTypeStoreSize(LI->getType()), - LI->getMetadata(LLVMContext::MD_tbaa), - ATy, NewPtr); + AAInfo, ATy, NewPtr); if (LI->isVolatile()) AS.setVolatile(); return NewPtr; } bool AliasSetTracker::add(StoreInst *SI) { if (SI->getOrdering() > Monotonic) return addUnknown(SI); + + AAMDNodes AAInfo; + SI->getAAMetadata(AAInfo); + AliasSet::AccessType ATy = AliasSet::Mods; bool NewPtr; Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), AA.getTypeStoreSize(Val->getType()), - SI->getMetadata(LLVMContext::MD_tbaa), - ATy, NewPtr); + AAInfo, ATy, NewPtr); if (SI->isVolatile()) AS.setVolatile(); return NewPtr; } bool AliasSetTracker::add(VAArgInst *VAAI) { + AAMDNodes AAInfo; + VAAI->getAAMetadata(AAInfo); + bool NewPtr; addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize, - VAAI->getMetadata(LLVMContext::MD_tbaa), - AliasSet::ModRef, NewPtr); + AAInfo, AliasSet::ModRef, NewPtr); return NewPtr; } @@ -382,7 +403,7 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { bool X; for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(), - ASI.getTBAAInfo(), + ASI.getAAInfo(), (AliasSet::AccessType)AS.AccessTy, X); if (AS.isVolatile()) NewAS.setVolatile(); } @@ -393,6 +414,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) { /// tracker. void AliasSetTracker::remove(AliasSet &AS) { // Drop all call sites. + if (!AS.UnknownInsts.empty()) + AS.dropRef(*this); AS.UnknownInsts.clear(); // Clear the alias set. @@ -419,8 +442,8 @@ void AliasSetTracker::remove(AliasSet &AS) { } bool -AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { - AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo); +AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) { + AliasSet *AS = findAliasSetForPointer(Ptr, Size, AAInfo); if (!AS) return false; remove(*AS); return true; @@ -428,8 +451,11 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { bool AliasSetTracker::remove(LoadInst *LI) { uint64_t Size = AA.getTypeStoreSize(LI->getType()); - const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa); - AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo); + + AAMDNodes AAInfo; + LI->getAAMetadata(AAInfo); + + AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, AAInfo); if (!AS) return false; remove(*AS); return true; @@ -437,17 +463,22 @@ bool AliasSetTracker::remove(LoadInst *LI) { bool AliasSetTracker::remove(StoreInst *SI) { uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); - const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa); - AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo); + + AAMDNodes AAInfo; + SI->getAAMetadata(AAInfo); + + AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, AAInfo); if (!AS) return false; remove(*AS); return true; } bool AliasSetTracker::remove(VAArgInst *VAAI) { + AAMDNodes AAInfo; + VAAI->getAAMetadata(AAInfo); + AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0), - AliasAnalysis::UnknownSize, - VAAI->getMetadata(LLVMContext::MD_tbaa)); + AliasAnalysis::UnknownSize, AAInfo); if (!AS) return false; remove(*AS); return true; @@ -489,10 +520,10 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { if (Instruction *Inst = dyn_cast(PtrVal)) { if (Inst->mayReadOrWriteMemory()) { // Scan all the alias sets to see if this call site is contained. - for (iterator I = begin(), E = end(); I != E; ++I) { - if (I->Forward) continue; - - I->removeUnknownInst(Inst); + for (iterator I = begin(), E = end(); I != E;) { + iterator Cur = I++; + if (!Cur->Forward) + Cur->removeUnknownInst(*this, Inst); } } } @@ -536,7 +567,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { I = PointerMap.find_as(From); AliasSet *AS = I->second->getAliasSet(*this); AS->addPointer(*this, Entry, I->second->getSize(), - I->second->getTBAAInfo(), + I->second->getAAInfo(), true); } diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index ade940a..f64bf0e 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); + initializeCFLAliasAnalysisPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDelinearizationPass(Registry); initializeDominanceFrontierPass(Registry); @@ -57,7 +58,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); - initializeRegionInfoPass(Registry); + initializeRegionInfoPassPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); initializeRegionOnlyViewerPass(Registry); @@ -66,6 +67,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeScalarEvolutionAliasAnalysisPass(Registry); initializeTargetTransformInfoAnalysisGroup(Registry); initializeTypeBasedAliasAnalysisPass(Registry); + initializeScopedNoAliasAAPass(Registry); } void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk index 4e435a1..8770fa7 100644 --- a/lib/Analysis/Android.mk +++ b/lib/Analysis/Android.mk @@ -7,12 +7,15 @@ analysis_SRC_FILES := \ AliasDebugger.cpp \ AliasSetTracker.cpp \ Analysis.cpp \ + AssumptionTracker.cpp \ BasicAliasAnalysis.cpp \ BlockFrequencyInfo.cpp \ BlockFrequencyInfoImpl.cpp \ BranchProbabilityInfo.cpp \ CFG.cpp \ CFGPrinter.cpp \ + CFLAliasAnalysis.cpp \ + CGSCCPassManager.cpp \ CaptureTracking.cpp \ CodeMetrics.cpp \ ConstantFolding.cpp \ @@ -21,7 +24,7 @@ analysis_SRC_FILES := \ DependenceAnalysis.cpp \ DomPrinter.cpp \ DominanceFrontier.cpp \ - CGSCCPassManager.cpp \ + FunctionTargetTransformInfo.cpp \ IVUsers.cpp \ InstCount.cpp \ InstructionSimplify.cpp \ @@ -51,6 +54,7 @@ analysis_SRC_FILES := \ ScalarEvolutionAliasAnalysis.cpp \ ScalarEvolutionExpander.cpp \ ScalarEvolutionNormalization.cpp \ + ScopedNoAliasAA.cpp \ SparsePropagation.cpp \ TargetTransformInfo.cpp \ Trace.cpp \ diff --git a/lib/Analysis/AssumptionTracker.cpp b/lib/Analysis/AssumptionTracker.cpp new file mode 100644 index 0000000..775ce1d --- /dev/null +++ b/lib/Analysis/AssumptionTracker.cpp @@ -0,0 +1,110 @@ +//===- AssumptionTracker.cpp - Track @llvm.assume -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that keeps track of @llvm.assume intrinsics in +// the functions of a module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AssumptionTracker.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +void AssumptionTracker::FunctionCallbackVH::deleted() { + AT->forgetCachedAssumptions(cast(getValPtr())); + // 'this' now dangles! +} + +void AssumptionTracker::forgetCachedAssumptions(Function *F) { + auto I = CachedAssumeCalls.find_as(F); + if (I != CachedAssumeCalls.end()) + CachedAssumeCalls.erase(I); +} + +void AssumptionTracker::CallCallbackVH::deleted() { + assert(F && "delete callback called on dummy handle"); + FunctionCallsMap::iterator I = AT->CachedAssumeCalls.find_as(F); + assert(I != AT->CachedAssumeCalls.end() && + "Function cleared from the map without removing the values?"); + + I->second->erase(*this); + // 'this' now dangles! +} + +AssumptionTracker::FunctionCallsMap::iterator +AssumptionTracker::scanFunction(Function *F) { + auto IP = CachedAssumeCalls.insert(std::make_pair( + FunctionCallbackVH(F, this), llvm::make_unique())); + assert(IP.second && "Scanning function already in the map?"); + + FunctionCallsMap::iterator I = IP.first; + + // Go through all instructions in all blocks, add all calls to @llvm.assume + // to our cache. + for (BasicBlock &B : *F) + for (Instruction &II : B) + if (match(&II, m_Intrinsic())) + I->second->insert(CallCallbackVH(&II, this)); + + return I; +} + +void AssumptionTracker::verifyAnalysis() const { +#ifndef NDEBUG + for (const auto &I : CachedAssumeCalls) { + for (const BasicBlock &B : cast(*I.first)) + for (const Instruction &II : B) { + if (match(&II, m_Intrinsic())) { + assert(I.second->find_as(&II) != I.second->end() && + "Assumption in scanned function not in cache"); + } + } + } +#endif +} + +void AssumptionTracker::registerAssumption(CallInst *CI) { + assert(match(CI, m_Intrinsic()) && + "Registered call does not call @llvm.assume"); + assert(CI->getParent() && + "Cannot register @llvm.assume call not in a basic block"); + + Function *F = CI->getParent()->getParent(); + assert(F && "Cannot register @llvm.assume call not in a function"); + + FunctionCallsMap::iterator I = CachedAssumeCalls.find_as(F); + if (I == CachedAssumeCalls.end()) { + // If this function has not already been scanned, then don't do anything + // here. This intrinsic will be found, if it still exists, if the list of + // assumptions in this function is requested at some later point. This + // maintains the following invariant: if a function is present in the + // cache, then its list of assumption intrinsic calls is complete. + return; + } + + I->second->insert(CallCallbackVH(CI, this)); +} + +AssumptionTracker::AssumptionTracker() : ImmutablePass(ID) { + initializeAssumptionTrackerPass(*PassRegistry::getPassRegistry()); +} + +AssumptionTracker::~AssumptionTracker() {} + +INITIALIZE_PASS(AssumptionTracker, "assumption-tracker", "Assumption Tracker", + false, true) +char AssumptionTracker::ID = 0; + diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index c50dd4a..9aba0d3 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -156,17 +157,6 @@ static bool isObjectSize(const Value *V, uint64_t Size, return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; } -/// isIdentifiedFunctionLocal - Return true if V is umabigously identified -/// at the function-level. Different IdentifiedFunctionLocals can't alias. -/// Further, an IdentifiedFunctionLocal can not alias with any function -/// arguments other than itself, which is not necessarily true for -/// IdentifiedObjects. -static bool isIdentifiedFunctionLocal(const Value *V) -{ - return isa(V) || isNoAliasCall(V) || isNoAliasArgument(V); -} - - //===----------------------------------------------------------------------===// // GetElementPtr Instruction Decomposition and Analysis //===----------------------------------------------------------------------===// @@ -205,7 +195,9 @@ namespace { /// represented in the result. static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, ExtensionKind &Extension, - const DataLayout &DL, unsigned Depth) { + const DataLayout &DL, unsigned Depth, + AssumptionTracker *AT, + DominatorTree *DT) { assert(V->getType()->isIntegerTy() && "Not an integer value"); // Limit our recursion depth. @@ -215,6 +207,14 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, return V; } + if (ConstantInt *Const = dyn_cast(V)) { + // if it's a constant, just convert it to an offset + // and remove the variable. + Offset += Const->getValue(); + assert(Scale == 0 && "Constant values don't have a scale"); + return V; + } + if (BinaryOperator *BOp = dyn_cast(V)) { if (ConstantInt *RHSC = dyn_cast(BOp->getOperand(1))) { switch (BOp->getOpcode()) { @@ -222,23 +222,24 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, case Instruction::Or: // X|C == X+C if all the bits in C are unset in X. Otherwise we can't // analyze it. - if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL)) + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, + AT, BOp, DT)) break; // FALL THROUGH. case Instruction::Add: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1); + DL, Depth+1, AT, DT); Offset += RHSC->getValue(); return V; case Instruction::Mul: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1); + DL, Depth+1, AT, DT); Offset *= RHSC->getValue(); Scale *= RHSC->getValue(); return V; case Instruction::Shl: V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, - DL, Depth+1); + DL, Depth+1, AT, DT); Offset <<= RHSC->getValue().getLimitedValue(); Scale <<= RHSC->getValue().getLimitedValue(); return V; @@ -259,9 +260,12 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, Extension = isa(V) ? EK_SignExt : EK_ZeroExt; Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, - DL, Depth+1); + DL, Depth+1, AT, DT); Scale = Scale.zext(OldWidth); - Offset = Offset.zext(OldWidth); + + // We have to sign-extend even if Extension == EK_ZeroExt as we can't + // decompose a sign extension (i.e. zext(x - 1) != zext(x) - zext(-1)). + Offset = Offset.sext(OldWidth); return Result; } @@ -289,7 +293,8 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, static const Value * DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, SmallVectorImpl &VarIndices, - bool &MaxLookupReached, const DataLayout *DL) { + bool &MaxLookupReached, const DataLayout *DL, + AssumptionTracker *AT, DominatorTree *DT) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = MaxLookupSearchDepth; MaxLookupReached = false; @@ -309,7 +314,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, return V; } - if (Op->getOpcode() == Instruction::BitCast) { + if (Op->getOpcode() == Instruction::BitCast || + Op->getOpcode() == Instruction::AddrSpaceCast) { V = Op->getOperand(0); continue; } @@ -319,7 +325,10 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If it's not a GEP, hand it off to SimplifyInstruction to see if it // can come up with something. This matches what GetUnderlyingObject does. if (const Instruction *I = dyn_cast(V)) - // TODO: Get a DominatorTree and use it here. + // TODO: Get a DominatorTree and AssumptionTracker and use them here + // (these are both now available in this function, but this should be + // updated when GetUnderlyingObject is updated). TLI should be + // provided also. if (const Value *Simplified = SimplifyInstruction(const_cast(I), DL)) { V = Simplified; @@ -378,7 +387,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, - *DL, 0); + *DL, 0, AT, DT); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. @@ -459,6 +468,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); } @@ -466,8 +476,8 @@ namespace { assert(AliasCache.empty() && "AliasCache must be cleared after use!"); assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && "BasicAliasAnalysis doesn't support interprocedural queries."); - AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag, - LocB.Ptr, LocB.Size, LocB.TBAATag); + AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, + LocB.Ptr, LocB.Size, LocB.AATags); // AliasCache rarely has more than 1 or 2 elements, always use // shrink_and_clear so it quickly returns to the inline capacity of the // SmallDenseMap if it ever grows larger. @@ -481,10 +491,7 @@ namespace { const Location &Loc) override; ModRefResult getModRefInfo(ImmutableCallSite CS1, - ImmutableCallSite CS2) override { - // The AliasAnalysis base class has some smarts, lets use them. - return AliasAnalysis::getModRefInfo(CS1, CS2); - } + ImmutableCallSite CS2) override; /// pointsToConstantMemory - Chase pointers until we find a (constant /// global) or not. @@ -554,28 +561,28 @@ namespace { // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, - const MDNode *V1TBAAInfo, + const AAMDNodes &V1AAInfo, const Value *V2, uint64_t V2Size, - const MDNode *V2TBAAInfo, + const AAMDNodes &V2AAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2); // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI // instruction against another. AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize, - const MDNode *PNTBAAInfo, + const AAMDNodes &PNAAInfo, const Value *V2, uint64_t V2Size, - const MDNode *V2TBAAInfo); + const AAMDNodes &V2AAInfo); /// aliasSelect - Disambiguate a Select instruction against another value. AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize, - const MDNode *SITBAAInfo, + const AAMDNodes &SIAAInfo, const Value *V2, uint64_t V2Size, - const MDNode *V2TBAAInfo); + const AAMDNodes &V2AAInfo); AliasResult aliasCheck(const Value *V1, uint64_t V1Size, - const MDNode *V1TBAATag, + AAMDNodes V1AATag, const Value *V2, uint64_t V2Size, - const MDNode *V2TBAATag); + AAMDNodes V2AATag); }; } // End of anonymous namespace @@ -584,6 +591,7 @@ char BasicAliasAnalysis::ID = 0; INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", "Basic Alias Analysis (stateless AA impl)", false, true, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", "Basic Alias Analysis (stateless AA impl)", @@ -606,7 +614,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { Worklist.push_back(Loc.Ptr); do { const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); - if (!Visited.insert(V)) { + if (!Visited.insert(V).second) { Visited.clear(); return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); } @@ -798,6 +806,14 @@ BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, return Loc; } +static bool isAssumeIntrinsic(ImmutableCallSite CS) { + const IntrinsicInst *II = dyn_cast(CS.getInstruction()); + if (II && II->getIntrinsicID() == Intrinsic::assume) + return true; + + return false; +} + /// getModRefInfo - Check to see if the specified callsite can clobber the /// specified memory object. Since we only look at local properties of this /// function, we really can't say much about this query. We do, however, use @@ -850,10 +866,29 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; } + // While the assume intrinsic is marked as arbitrarily writing so that + // proper control dependencies will be maintained, it never aliases any + // particular memory location. + if (isAssumeIntrinsic(CS)) + return NoModRef; + // The AliasAnalysis base class has some smarts, lets use them. return AliasAnalysis::getModRefInfo(CS, Loc); } +AliasAnalysis::ModRefResult +BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) { + // While the assume intrinsic is marked as arbitrarily writing so that + // proper control dependencies will be maintained, it never aliases any + // particular memory location. + if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2)) + return NoModRef; + + // The AliasAnalysis base class has some smarts, lets use them. + return AliasAnalysis::getModRefInfo(CS1, CS2); +} + /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL), @@ -861,30 +896,35 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, /// AliasAnalysis::AliasResult BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, - const MDNode *V1TBAAInfo, + const AAMDNodes &V1AAInfo, const Value *V2, uint64_t V2Size, - const MDNode *V2TBAAInfo, + const AAMDNodes &V2AAInfo, const Value *UnderlyingV1, const Value *UnderlyingV2) { int64_t GEP1BaseOffset; bool GEP1MaxLookupReached; SmallVector GEP1VariableIndices; + AssumptionTracker *AT = &getAnalysis(); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + // If we have two gep instructions with must-alias or not-alias'ing base // pointers, figure out if the indexes to the GEP tell us anything about the // derived pointer. if (const GEPOperator *GEP2 = dyn_cast(V2)) { // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, nullptr, - UnderlyingV2, UnknownSize, nullptr); + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, AAMDNodes(), + UnderlyingV2, UnknownSize, AAMDNodes()); // Check for geps of non-aliasing underlying pointers where the offsets are // identical. if ((BaseAlias == MayAlias) && V1Size == V2Size) { // Do the base pointers alias assuming type and size. AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, - V1TBAAInfo, UnderlyingV2, - V2Size, V2TBAAInfo); + V1AAInfo, UnderlyingV2, + V2Size, V2AAInfo); if (PreciseBaseAlias == NoAlias) { // See if the computed offset from the common pointer tells us about the // relation of the resulting pointer. @@ -893,10 +933,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, SmallVector GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL); + GEP2MaxLookupReached, DL, AT, DT); const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL); + GEP1MaxLookupReached, DL, AT, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { @@ -925,14 +965,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // about the relation of the resulting pointer. const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL); + GEP1MaxLookupReached, DL, AT, DT); int64_t GEP2BaseOffset; bool GEP2MaxLookupReached; SmallVector GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, - GEP2MaxLookupReached, DL); + GEP2MaxLookupReached, DL, AT, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -959,8 +999,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (V1Size == UnknownSize && V2Size == UnknownSize) return MayAlias; - AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, nullptr, - V2, V2Size, V2TBAAInfo); + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, AAMDNodes(), + V2, V2Size, V2AAInfo); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. // If V2 is known not to alias GEP base pointer, then the two values @@ -971,7 +1011,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, - GEP1MaxLookupReached, DL); + GEP1MaxLookupReached, DL, AT, DT); // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. @@ -1022,12 +1062,45 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, } } - // Try to distinguish something like &A[i][1] against &A[42][0]. - // Grab the least significant bit set in any of the scales. if (!GEP1VariableIndices.empty()) { uint64_t Modulo = 0; - for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) - Modulo |= (uint64_t)GEP1VariableIndices[i].Scale; + bool AllPositive = true; + for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) { + + // Try to distinguish something like &A[i][1] against &A[42][0]. + // Grab the least significant bit set in any of the scales. We + // don't need std::abs here (even if the scale's negative) as we'll + // be ^'ing Modulo with itself later. + Modulo |= (uint64_t) GEP1VariableIndices[i].Scale; + + if (AllPositive) { + // If the Value could change between cycles, then any reasoning about + // the Value this cycle may not hold in the next cycle. We'll just + // give up if we can't determine conditions that hold for every cycle: + const Value *V = GEP1VariableIndices[i].V; + + bool SignKnownZero, SignKnownOne; + ComputeSignBit( + const_cast(V), + SignKnownZero, SignKnownOne, + DL, 0, AT, nullptr, DT); + + // Zero-extension widens the variable, and so forces the sign + // bit to zero. + bool IsZExt = GEP1VariableIndices[i].Extension == EK_ZeroExt; + SignKnownZero |= IsZExt; + SignKnownOne &= !IsZExt; + + // If the variable begins with a zero then we know it's + // positive, regardless of whether the value is signed or + // unsigned. + int64_t Scale = GEP1VariableIndices[i].Scale; + AllPositive = + (SignKnownZero && Scale >= 0) || + (SignKnownOne && Scale < 0); + } + } + Modulo = Modulo ^ (Modulo & (Modulo - 1)); // We can compute the difference between the two addresses @@ -1037,6 +1110,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (V1Size != UnknownSize && V2Size != UnknownSize && ModOffset >= V2Size && V1Size <= Modulo - ModOffset) return NoAlias; + + // If we know all the variables are positive, then GEP1 >= GEP1BasePtr. + // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers + // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr. + if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t) GEP1BaseOffset) + return NoAlias; } // Statically, we can see that the base objects are the same, but the @@ -1066,33 +1145,33 @@ MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) { /// instruction against another. AliasAnalysis::AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, - const MDNode *SITBAAInfo, + const AAMDNodes &SIAAInfo, const Value *V2, uint64_t V2Size, - const MDNode *V2TBAAInfo) { + const AAMDNodes &V2AAInfo) { // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast(V2)) if (SI->getCondition() == SI2->getCondition()) { AliasResult Alias = - aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo, - SI2->getTrueValue(), V2Size, V2TBAAInfo); + aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, + SI2->getTrueValue(), V2Size, V2AAInfo); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = - aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo, - SI2->getFalseValue(), V2Size, V2TBAAInfo); + aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, + SI2->getFalseValue(), V2Size, V2AAInfo); return MergeAliasResults(ThisAlias, Alias); } // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. AliasResult Alias = - aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo); if (Alias == MayAlias) return MayAlias; AliasResult ThisAlias = - aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo); + aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo); return MergeAliasResults(ThisAlias, Alias); } @@ -1100,9 +1179,9 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize, // against another. AliasAnalysis::AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, - const MDNode *PNTBAAInfo, + const AAMDNodes &PNAAInfo, const Value *V2, uint64_t V2Size, - const MDNode *V2TBAAInfo) { + const AAMDNodes &V2AAInfo) { // Track phi nodes we have visited. We use this information when we determine // value equivalence. VisitedPhiBBs.insert(PN->getParent()); @@ -1112,8 +1191,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, // on corresponding edges. if (const PHINode *PN2 = dyn_cast(V2)) if (PN2->getParent() == PN->getParent()) { - LocPair Locs(Location(PN, PNSize, PNTBAAInfo), - Location(V2, V2Size, V2TBAAInfo)); + LocPair Locs(Location(PN, PNSize, PNAAInfo), + Location(V2, V2Size, V2AAInfo)); if (PN > V2) std::swap(Locs.first, Locs.second); // Analyse the PHIs' inputs under the assumption that the PHIs are @@ -1131,9 +1210,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = - aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, + aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), - V2Size, V2TBAAInfo); + V2Size, V2AAInfo); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1156,12 +1235,12 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, // sides are PHI nodes. In which case, this is O(m x n) time where 'm' // and 'n' are the number of PHI sources. return MayAlias; - if (UniqueSrc.insert(PV1)) + if (UniqueSrc.insert(PV1).second) V1Srcs.push_back(PV1); } - AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo, - V1Srcs[0], PNSize, PNTBAAInfo); + AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, + V1Srcs[0], PNSize, PNAAInfo); // Early exit if the check of the first PHI source against V2 is MayAlias. // Other results are not possible. if (Alias == MayAlias) @@ -1172,8 +1251,8 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo, - V, PNSize, PNTBAAInfo); + AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, + V, PNSize, PNAAInfo); Alias = MergeAliasResults(ThisAlias, Alias); if (Alias == MayAlias) break; @@ -1187,9 +1266,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, // AliasAnalysis::AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, - const MDNode *V1TBAAInfo, + AAMDNodes V1AAInfo, const Value *V2, uint64_t V2Size, - const MDNode *V2TBAAInfo) { + AAMDNodes V2AAInfo) { // If either of the memory references is empty, it doesn't matter what the // pointer values are. if (V1Size == 0 || V2Size == 0) @@ -1269,8 +1348,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, // Check the cache before climbing up use-def chains. This also terminates // otherwise infinitely recursive queries. - LocPair Locs(Location(V1, V1Size, V1TBAAInfo), - Location(V2, V2Size, V2TBAAInfo)); + LocPair Locs(Location(V1, V1Size, V1AAInfo), + Location(V2, V2Size, V2AAInfo)); if (V1 > V2) std::swap(Locs.first, Locs.second); std::pair Pair = @@ -1284,32 +1363,32 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, std::swap(V1, V2); std::swap(V1Size, V2Size); std::swap(O1, O2); - std::swap(V1TBAAInfo, V2TBAAInfo); + std::swap(V1AAInfo, V2AAInfo); } if (const GEPOperator *GV1 = dyn_cast(V1)) { - AliasResult Result = aliasGEP(GV1, V1Size, V1TBAAInfo, V2, V2Size, V2TBAAInfo, O1, O2); + AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); if (Result != MayAlias) return AliasCache[Locs] = Result; } if (isa(V2) && !isa(V1)) { std::swap(V1, V2); std::swap(V1Size, V2Size); - std::swap(V1TBAAInfo, V2TBAAInfo); + std::swap(V1AAInfo, V2AAInfo); } if (const PHINode *PN = dyn_cast(V1)) { - AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo, - V2, V2Size, V2TBAAInfo); + AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, + V2, V2Size, V2AAInfo); if (Result != MayAlias) return AliasCache[Locs] = Result; } if (isa(V2) && !isa(V1)) { std::swap(V1, V2); std::swap(V1Size, V2Size); - std::swap(V1TBAAInfo, V2TBAAInfo); + std::swap(V1AAInfo, V2AAInfo); } if (const SelectInst *S1 = dyn_cast(V1)) { - AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo, - V2, V2Size, V2TBAAInfo); + AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo, + V2, V2Size, V2AAInfo); if (Result != MayAlias) return AliasCache[Locs] = Result; } @@ -1322,8 +1401,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, return AliasCache[Locs] = PartialAlias; AliasResult Result = - AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo), - Location(V2, V2Size, V2TBAAInfo)); + AliasAnalysis::alias(Location(V1, V1Size, V1AAInfo), + Location(V2, V2Size, V2AAInfo)); return AliasCache[Locs] = Result; } @@ -1348,10 +1427,8 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, // Make sure that the visited phis cannot reach the Value. This ensures that // the Values cannot come from different iterations of a potential cycle the // phi nodes could be involved in. - for (SmallPtrSet::iterator PI = VisitedPhiBBs.begin(), - PE = VisitedPhiBBs.end(); - PI != PE; ++PI) - if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI)) + for (auto *P : VisitedPhiBBs) + if (isPotentiallyReachable(P->begin(), Inst, DT, LI)) return false; return true; diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 4fd2c11..06b8acd 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -14,18 +14,12 @@ #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; using namespace llvm::bfi_detail; #define DEBUG_TYPE "block-freq" -//===----------------------------------------------------------------------===// -// -// BlockMass implementation. -// -//===----------------------------------------------------------------------===// ScaledNumber BlockMass::toScaled() const { if (isFull()) return ScaledNumber(1, 0); @@ -46,11 +40,6 @@ raw_ostream &BlockMass::print(raw_ostream &OS) const { return OS; } -//===----------------------------------------------------------------------===// -// -// BlockFrequencyInfoImpl implementation. -// -//===----------------------------------------------------------------------===// namespace { typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; @@ -87,7 +76,8 @@ struct DitheringDistributer { BlockMass takeMass(uint32_t Weight); }; -} + +} // end namespace DitheringDistributer::DitheringDistributer(Distribution &Dist, const BlockMass &Mass) { @@ -121,11 +111,7 @@ void Distribution::add(const BlockNode &Node, uint64_t Amount, Total = NewTotal; // Save the weight. - Weight W; - W.TargetNode = Node; - W.Amount = Amount; - W.Type = Type; - Weights.push_back(W); + Weights.push_back(Weight(Type, Node, Amount)); } static void combineWeight(Weight &W, const Weight &OtherW) { @@ -615,7 +601,8 @@ static void findIrreducibleHeaders( break; } } - assert(Headers.size() >= 2 && "Should be irreducible"); + assert(Headers.size() >= 2 && + "Expected irreducible CFG; -loop-info is likely invalid"); if (Headers.size() == InSCC.size()) { // Every block is a header. std::sort(Headers.begin(), Headers.end()); diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index 8ef5302..25e7bc0 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -45,7 +45,7 @@ void llvm::FindFunctionBackedges(const Function &F, bool FoundNew = false; while (I != succ_end(ParentBB)) { BB = *I++; - if (Visited.insert(BB)) { + if (Visited.insert(BB).second) { FoundNew = true; break; } @@ -141,7 +141,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl &Worklist, SmallSet Visited; do { BasicBlock *BB = Worklist.pop_back_val(); - if (!Visited.insert(BB)) + if (!Visited.insert(BB).second) continue; if (BB == StopBB) return true; diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index c2c19d6..89787f82 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -79,11 +79,11 @@ namespace { bool runOnFunction(Function &F) override { std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; - - std::string ErrorInfo; - raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text); - if (ErrorInfo.empty()) + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + + if (!EC) WriteGraph(File, (const Function*)&F); else errs() << " error opening file for writing!"; @@ -114,10 +114,10 @@ namespace { std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; - std::string ErrorInfo; - raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text); - - if (ErrorInfo.empty()) + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + + if (!EC) WriteGraph(File, (const Function*)&F, true); else errs() << " error opening file for writing!"; diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp new file mode 100644 index 0000000..5f1b3d3 --- /dev/null +++ b/lib/Analysis/CFLAliasAnalysis.cpp @@ -0,0 +1,1013 @@ +//===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a CFL-based context-insensitive alias analysis +// algorithm. It does not depend on types. The algorithm is a mixture of the one +// described in "Demand-driven alias analysis for C" by Xin Zheng and Radu +// Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to +// Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the +// papers, we build a graph of the uses of a variable, where each node is a +// memory location, and each edge is an action that happened on that memory +// location. The "actions" can be one of Dereference, Reference, Assign, or +// Assign. +// +// Two variables are considered as aliasing iff you can reach one value's node +// from the other value's node and the language formed by concatenating all of +// the edge labels (actions) conforms to a context-free grammar. +// +// Because this algorithm requires a graph search on each query, we execute the +// algorithm outlined in "Fast algorithms..." (mentioned above) +// in order to transform the graph into sets of variables that may alias in +// ~nlogn time (n = number of variables.), which makes queries take constant +// time. +//===----------------------------------------------------------------------===// + +#include "StratifiedSets.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/None.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include + +using namespace llvm; + +// Try to go from a Value* to a Function*. Never returns nullptr. +static Optional parentFunctionOfValue(Value *); + +// Returns possible functions called by the Inst* into the given +// SmallVectorImpl. Returns true if targets found, false otherwise. +// This is templated because InvokeInst/CallInst give us the same +// set of functions that we care about, and I don't like repeating +// myself. +template +static bool getPossibleTargets(Inst *, SmallVectorImpl &); + +// Some instructions need to have their users tracked. Instructions like +// `add` require you to get the users of the Instruction* itself, other +// instructions like `store` require you to get the users of the first +// operand. This function gets the "proper" value to track for each +// type of instruction we support. +static Optional getTargetValue(Instruction *); + +// There are certain instructions (i.e. FenceInst, etc.) that we ignore. +// This notes that we should ignore those. +static bool hasUsefulEdges(Instruction *); + +const StratifiedIndex StratifiedLink::SetSentinel = + std::numeric_limits::max(); + +namespace { +// StratifiedInfo Attribute things. +typedef unsigned StratifiedAttr; +LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs; +LLVM_CONSTEXPR unsigned AttrAllIndex = 0; +LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1; +LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 2; +LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex; +LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex; + +LLVM_CONSTEXPR StratifiedAttr AttrNone = 0; +LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone; + +// \brief StratifiedSets call for knowledge of "direction", so this is how we +// represent that locally. +enum class Level { Same, Above, Below }; + +// \brief Edges can be one of four "weights" -- each weight must have an inverse +// weight (Assign has Assign; Reference has Dereference). +enum class EdgeType { + // The weight assigned when assigning from or to a value. For example, in: + // %b = getelementptr %a, 0 + // ...The relationships are %b assign %a, and %a assign %b. This used to be + // two edges, but having a distinction bought us nothing. + Assign, + + // The edge used when we have an edge going from some handle to a Value. + // Examples of this include: + // %b = load %a (%b Dereference %a) + // %b = extractelement %a, 0 (%a Dereference %b) + Dereference, + + // The edge used when our edge goes from a value to a handle that may have + // contained it at some point. Examples: + // %b = load %a (%a Reference %b) + // %b = extractelement %a, 0 (%b Reference %a) + Reference +}; + +// \brief Encodes the notion of a "use" +struct Edge { + // \brief Which value the edge is coming from + Value *From; + + // \brief Which value the edge is pointing to + Value *To; + + // \brief Edge weight + EdgeType Weight; + + // \brief Whether we aliased any external values along the way that may be + // invisible to the analysis (i.e. landingpad for exceptions, calls for + // interprocedural analysis, etc.) + StratifiedAttrs AdditionalAttrs; + + Edge(Value *From, Value *To, EdgeType W, StratifiedAttrs A) + : From(From), To(To), Weight(W), AdditionalAttrs(A) {} +}; + +// \brief Information we have about a function and would like to keep around +struct FunctionInfo { + StratifiedSets Sets; + // Lots of functions have < 4 returns. Adjust as necessary. + SmallVector ReturnedValues; + + FunctionInfo(StratifiedSets &&S, + SmallVector &&RV) + : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} +}; + +struct CFLAliasAnalysis; + +struct FunctionHandle : public CallbackVH { + FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA) + : CallbackVH(Fn), CFLAA(CFLAA) { + assert(Fn != nullptr); + assert(CFLAA != nullptr); + } + + virtual ~FunctionHandle() {} + + void deleted() override { removeSelfFromCache(); } + void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } + +private: + CFLAliasAnalysis *CFLAA; + + void removeSelfFromCache(); +}; + +struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis { +private: + /// \brief Cached mapping of Functions to their StratifiedSets. + /// If a function's sets are currently being built, it is marked + /// in the cache as an Optional without a value. This way, if we + /// have any kind of recursion, it is discernable from a function + /// that simply has empty sets. + DenseMap> Cache; + std::forward_list Handles; + +public: + static char ID; + + CFLAliasAnalysis() : ImmutablePass(ID) { + initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + virtual ~CFLAliasAnalysis() {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AliasAnalysis::getAnalysisUsage(AU); + } + + void *getAdjustedAnalysisPointer(const void *ID) override { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis *)this; + return this; + } + + /// \brief Inserts the given Function into the cache. + void scan(Function *Fn); + + void evict(Function *Fn) { Cache.erase(Fn); } + + /// \brief Ensures that the given function is available in the cache. + /// Returns the appropriate entry from the cache. + const Optional &ensureCached(Function *Fn) { + auto Iter = Cache.find(Fn); + if (Iter == Cache.end()) { + scan(Fn); + Iter = Cache.find(Fn); + assert(Iter != Cache.end()); + assert(Iter->second.hasValue()); + } + return Iter->second; + } + + AliasResult query(const Location &LocA, const Location &LocB); + + AliasResult alias(const Location &LocA, const Location &LocB) override { + if (LocA.Ptr == LocB.Ptr) { + if (LocA.Size == LocB.Size) { + return MustAlias; + } else { + return PartialAlias; + } + } + + // Comparisons between global variables and other constants should be + // handled by BasicAA. + if (isa(LocA.Ptr) && isa(LocB.Ptr)) { + return MayAlias; + } + + return query(LocA, LocB); + } + + void initializePass() override { InitializeAliasAnalysis(this); } +}; + +void FunctionHandle::removeSelfFromCache() { + assert(CFLAA != nullptr); + auto *Val = getValPtr(); + CFLAA->evict(cast(Val)); + setValPtr(nullptr); +} + +// \brief Gets the edges our graph should have, based on an Instruction* +class GetEdgesVisitor : public InstVisitor { + CFLAliasAnalysis &AA; + SmallVectorImpl &Output; + +public: + GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl &Output) + : AA(AA), Output(Output) {} + + void visitInstruction(Instruction &) { + llvm_unreachable("Unsupported instruction encountered"); + } + + void visitCastInst(CastInst &Inst) { + Output.push_back(Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, + AttrNone)); + } + + void visitBinaryOperator(BinaryOperator &Inst) { + auto *Op1 = Inst.getOperand(0); + auto *Op2 = Inst.getOperand(1); + Output.push_back(Edge(&Inst, Op1, EdgeType::Assign, AttrNone)); + Output.push_back(Edge(&Inst, Op2, EdgeType::Assign, AttrNone)); + } + + void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { + auto *Ptr = Inst.getPointerOperand(); + auto *Val = Inst.getNewValOperand(); + Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone)); + } + + void visitAtomicRMWInst(AtomicRMWInst &Inst) { + auto *Ptr = Inst.getPointerOperand(); + auto *Val = Inst.getValOperand(); + Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone)); + } + + void visitPHINode(PHINode &Inst) { + for (unsigned I = 0, E = Inst.getNumIncomingValues(); I != E; ++I) { + Value *Val = Inst.getIncomingValue(I); + Output.push_back(Edge(&Inst, Val, EdgeType::Assign, AttrNone)); + } + } + + void visitGetElementPtrInst(GetElementPtrInst &Inst) { + auto *Op = Inst.getPointerOperand(); + Output.push_back(Edge(&Inst, Op, EdgeType::Assign, AttrNone)); + for (auto I = Inst.idx_begin(), E = Inst.idx_end(); I != E; ++I) + Output.push_back(Edge(&Inst, *I, EdgeType::Assign, AttrNone)); + } + + void visitSelectInst(SelectInst &Inst) { + auto *Condition = Inst.getCondition(); + Output.push_back(Edge(&Inst, Condition, EdgeType::Assign, AttrNone)); + auto *TrueVal = Inst.getTrueValue(); + Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone)); + auto *FalseVal = Inst.getFalseValue(); + Output.push_back(Edge(&Inst, FalseVal, EdgeType::Assign, AttrNone)); + } + + void visitAllocaInst(AllocaInst &) {} + + void visitLoadInst(LoadInst &Inst) { + auto *Ptr = Inst.getPointerOperand(); + auto *Val = &Inst; + Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone)); + } + + void visitStoreInst(StoreInst &Inst) { + auto *Ptr = Inst.getPointerOperand(); + auto *Val = Inst.getValueOperand(); + Output.push_back(Edge(Ptr, Val, EdgeType::Dereference, AttrNone)); + } + + void visitVAArgInst(VAArgInst &Inst) { + // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it does + // two things: + // 1. Loads a value from *((T*)*Ptr). + // 2. Increments (stores to) *Ptr by some target-specific amount. + // For now, we'll handle this like a landingpad instruction (by placing the + // result in its own group, and having that group alias externals). + auto *Val = &Inst; + Output.push_back(Edge(Val, Val, EdgeType::Assign, AttrAll)); + } + + static bool isFunctionExternal(Function *Fn) { + return Fn->isDeclaration() || !Fn->hasLocalLinkage(); + } + + // Gets whether the sets at Index1 above, below, or equal to the sets at + // Index2. Returns None if they are not in the same set chain. + static Optional getIndexRelation(const StratifiedSets &Sets, + StratifiedIndex Index1, + StratifiedIndex Index2) { + if (Index1 == Index2) + return Level::Same; + + const auto *Current = &Sets.getLink(Index1); + while (Current->hasBelow()) { + if (Current->Below == Index2) + return Level::Below; + Current = &Sets.getLink(Current->Below); + } + + Current = &Sets.getLink(Index1); + while (Current->hasAbove()) { + if (Current->Above == Index2) + return Level::Above; + Current = &Sets.getLink(Current->Above); + } + + return NoneType(); + } + + bool + tryInterproceduralAnalysis(const SmallVectorImpl &Fns, + Value *FuncValue, + const iterator_range &Args) { + const unsigned ExpectedMaxArgs = 8; + const unsigned MaxSupportedArgs = 50; + assert(Fns.size() > 0); + + // I put this here to give us an upper bound on time taken by IPA. Is it + // really (realistically) needed? Keep in mind that we do have an n^2 algo. + if (std::distance(Args.begin(), Args.end()) > (int) MaxSupportedArgs) + return false; + + // Exit early if we'll fail anyway + for (auto *Fn : Fns) { + if (isFunctionExternal(Fn) || Fn->isVarArg()) + return false; + auto &MaybeInfo = AA.ensureCached(Fn); + if (!MaybeInfo.hasValue()) + return false; + } + + SmallVector Arguments(Args.begin(), Args.end()); + SmallVector Parameters; + for (auto *Fn : Fns) { + auto &Info = *AA.ensureCached(Fn); + auto &Sets = Info.Sets; + auto &RetVals = Info.ReturnedValues; + + Parameters.clear(); + for (auto &Param : Fn->args()) { + auto MaybeInfo = Sets.find(&Param); + // Did a new parameter somehow get added to the function/slip by? + if (!MaybeInfo.hasValue()) + return false; + Parameters.push_back(*MaybeInfo); + } + + // Adding an edge from argument -> return value for each parameter that + // may alias the return value + for (unsigned I = 0, E = Parameters.size(); I != E; ++I) { + auto &ParamInfo = Parameters[I]; + auto &ArgVal = Arguments[I]; + bool AddEdge = false; + StratifiedAttrs Externals; + for (unsigned X = 0, XE = RetVals.size(); X != XE; ++X) { + auto MaybeInfo = Sets.find(RetVals[X]); + if (!MaybeInfo.hasValue()) + return false; + + auto &RetInfo = *MaybeInfo; + auto RetAttrs = Sets.getLink(RetInfo.Index).Attrs; + auto ParamAttrs = Sets.getLink(ParamInfo.Index).Attrs; + auto MaybeRelation = + getIndexRelation(Sets, ParamInfo.Index, RetInfo.Index); + if (MaybeRelation.hasValue()) { + AddEdge = true; + Externals |= RetAttrs | ParamAttrs; + } + } + if (AddEdge) + Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign, + StratifiedAttrs().flip())); + } + + if (Parameters.size() != Arguments.size()) + return false; + + // Adding edges between arguments for arguments that may end up aliasing + // each other. This is necessary for functions such as + // void foo(int** a, int** b) { *a = *b; } + // (Technically, the proper sets for this would be those below + // Arguments[I] and Arguments[X], but our algorithm will produce + // extremely similar, and equally correct, results either way) + for (unsigned I = 0, E = Arguments.size(); I != E; ++I) { + auto &MainVal = Arguments[I]; + auto &MainInfo = Parameters[I]; + auto &MainAttrs = Sets.getLink(MainInfo.Index).Attrs; + for (unsigned X = I + 1; X != E; ++X) { + auto &SubInfo = Parameters[X]; + auto &SubVal = Arguments[X]; + auto &SubAttrs = Sets.getLink(SubInfo.Index).Attrs; + auto MaybeRelation = + getIndexRelation(Sets, MainInfo.Index, SubInfo.Index); + + if (!MaybeRelation.hasValue()) + continue; + + auto NewAttrs = SubAttrs | MainAttrs; + Output.push_back(Edge(MainVal, SubVal, EdgeType::Assign, NewAttrs)); + } + } + } + return true; + } + + template void visitCallLikeInst(InstT &Inst) { + SmallVector Targets; + if (getPossibleTargets(&Inst, Targets)) { + if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands())) + return; + // Cleanup from interprocedural analysis + Output.clear(); + } + + for (Value *V : Inst.arg_operands()) + Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll)); + } + + void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); } + + void visitInvokeInst(InvokeInst &Inst) { visitCallLikeInst(Inst); } + + // Because vectors/aggregates are immutable and unaddressable, + // there's nothing we can do to coax a value out of them, other + // than calling Extract{Element,Value}. We can effectively treat + // them as pointers to arbitrary memory locations we can store in + // and load from. + void visitExtractElementInst(ExtractElementInst &Inst) { + auto *Ptr = Inst.getVectorOperand(); + auto *Val = &Inst; + Output.push_back(Edge(Val, Ptr, EdgeType::Reference, AttrNone)); + } + + void visitInsertElementInst(InsertElementInst &Inst) { + auto *Vec = Inst.getOperand(0); + auto *Val = Inst.getOperand(1); + Output.push_back(Edge(&Inst, Vec, EdgeType::Assign, AttrNone)); + Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone)); + } + + void visitLandingPadInst(LandingPadInst &Inst) { + // Exceptions come from "nowhere", from our analysis' perspective. + // So we place the instruction its own group, noting that said group may + // alias externals + Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll)); + } + + void visitInsertValueInst(InsertValueInst &Inst) { + auto *Agg = Inst.getOperand(0); + auto *Val = Inst.getOperand(1); + Output.push_back(Edge(&Inst, Agg, EdgeType::Assign, AttrNone)); + Output.push_back(Edge(&Inst, Val, EdgeType::Dereference, AttrNone)); + } + + void visitExtractValueInst(ExtractValueInst &Inst) { + auto *Ptr = Inst.getAggregateOperand(); + Output.push_back(Edge(&Inst, Ptr, EdgeType::Reference, AttrNone)); + } + + void visitShuffleVectorInst(ShuffleVectorInst &Inst) { + auto *From1 = Inst.getOperand(0); + auto *From2 = Inst.getOperand(1); + Output.push_back(Edge(&Inst, From1, EdgeType::Assign, AttrNone)); + Output.push_back(Edge(&Inst, From2, EdgeType::Assign, AttrNone)); + } +}; + +// For a given instruction, we need to know which Value* to get the +// users of in order to build our graph. In some cases (i.e. add), +// we simply need the Instruction*. In other cases (i.e. store), +// finding the users of the Instruction* is useless; we need to find +// the users of the first operand. This handles determining which +// value to follow for us. +// +// Note: we *need* to keep this in sync with GetEdgesVisitor. Add +// something to GetEdgesVisitor, add it here -- remove something from +// GetEdgesVisitor, remove it here. +class GetTargetValueVisitor + : public InstVisitor { +public: + Value *visitInstruction(Instruction &Inst) { return &Inst; } + + Value *visitStoreInst(StoreInst &Inst) { return Inst.getPointerOperand(); } + + Value *visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) { + return Inst.getPointerOperand(); + } + + Value *visitAtomicRMWInst(AtomicRMWInst &Inst) { + return Inst.getPointerOperand(); + } + + Value *visitInsertElementInst(InsertElementInst &Inst) { + return Inst.getOperand(0); + } + + Value *visitInsertValueInst(InsertValueInst &Inst) { + return Inst.getAggregateOperand(); + } +}; + +// Set building requires a weighted bidirectional graph. +template class WeightedBidirectionalGraph { +public: + typedef std::size_t Node; + +private: + const static Node StartNode = Node(0); + + struct Edge { + EdgeTypeT Weight; + Node Other; + + Edge(const EdgeTypeT &W, const Node &N) + : Weight(W), Other(N) {} + + bool operator==(const Edge &E) const { + return Weight == E.Weight && Other == E.Other; + } + + bool operator!=(const Edge &E) const { return !operator==(E); } + }; + + struct NodeImpl { + std::vector Edges; + }; + + std::vector NodeImpls; + + bool inbounds(Node NodeIndex) const { return NodeIndex < NodeImpls.size(); } + + const NodeImpl &getNode(Node N) const { return NodeImpls[N]; } + NodeImpl &getNode(Node N) { return NodeImpls[N]; } + +public: + // ----- Various Edge iterators for the graph ----- // + + // \brief Iterator for edges. Because this graph is bidirected, we don't + // allow modificaiton of the edges using this iterator. Additionally, the + // iterator becomes invalid if you add edges to or from the node you're + // getting the edges of. + struct EdgeIterator : public std::iterator> { + EdgeIterator(const typename std::vector::const_iterator &Iter) + : Current(Iter) {} + + EdgeIterator(NodeImpl &Impl) : Current(Impl.begin()) {} + + EdgeIterator &operator++() { + ++Current; + return *this; + } + + EdgeIterator operator++(int) { + EdgeIterator Copy(Current); + operator++(); + return Copy; + } + + std::tuple &operator*() { + Store = std::make_tuple(Current->Weight, Current->Other); + return Store; + } + + bool operator==(const EdgeIterator &Other) const { + return Current == Other.Current; + } + + bool operator!=(const EdgeIterator &Other) const { + return !operator==(Other); + } + + private: + typename std::vector::const_iterator Current; + std::tuple Store; + }; + + // Wrapper for EdgeIterator with begin()/end() calls. + struct EdgeIterable { + EdgeIterable(const std::vector &Edges) + : BeginIter(Edges.begin()), EndIter(Edges.end()) {} + + EdgeIterator begin() { return EdgeIterator(BeginIter); } + + EdgeIterator end() { return EdgeIterator(EndIter); } + + private: + typename std::vector::const_iterator BeginIter; + typename std::vector::const_iterator EndIter; + }; + + // ----- Actual graph-related things ----- // + + WeightedBidirectionalGraph() {} + + WeightedBidirectionalGraph(WeightedBidirectionalGraph &&Other) + : NodeImpls(std::move(Other.NodeImpls)) {} + + WeightedBidirectionalGraph & + operator=(WeightedBidirectionalGraph &&Other) { + NodeImpls = std::move(Other.NodeImpls); + return *this; + } + + Node addNode() { + auto Index = NodeImpls.size(); + auto NewNode = Node(Index); + NodeImpls.push_back(NodeImpl()); + return NewNode; + } + + void addEdge(Node From, Node To, const EdgeTypeT &Weight, + const EdgeTypeT &ReverseWeight) { + assert(inbounds(From)); + assert(inbounds(To)); + auto &FromNode = getNode(From); + auto &ToNode = getNode(To); + FromNode.Edges.push_back(Edge(Weight, To)); + ToNode.Edges.push_back(Edge(ReverseWeight, From)); + } + + EdgeIterable edgesFor(const Node &N) const { + const auto &Node = getNode(N); + return EdgeIterable(Node.Edges); + } + + bool empty() const { return NodeImpls.empty(); } + std::size_t size() const { return NodeImpls.size(); } + + // \brief Gets an arbitrary node in the graph as a starting point for + // traversal. + Node getEntryNode() { + assert(inbounds(StartNode)); + return StartNode; + } +}; + +typedef WeightedBidirectionalGraph> GraphT; +typedef DenseMap NodeMapT; +} + +// -- Setting up/registering CFLAA pass -- // +char CFLAliasAnalysis::ID = 0; + +INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa", + "CFL-Based AA implementation", false, true, false) + +ImmutablePass *llvm::createCFLAliasAnalysisPass() { + return new CFLAliasAnalysis(); +} + +//===----------------------------------------------------------------------===// +// Function declarations that require types defined in the namespace above +//===----------------------------------------------------------------------===// + +// Given an argument number, returns the appropriate Attr index to set. +static StratifiedAttr argNumberToAttrIndex(StratifiedAttr); + +// Given a Value, potentially return which AttrIndex it maps to. +static Optional valueToAttrIndex(Value *Val); + +// Gets the inverse of a given EdgeType. +static EdgeType flipWeight(EdgeType); + +// Gets edges of the given Instruction*, writing them to the SmallVector*. +static void argsToEdges(CFLAliasAnalysis &, Instruction *, + SmallVectorImpl &); + +// Gets the "Level" that one should travel in StratifiedSets +// given an EdgeType. +static Level directionOfEdgeType(EdgeType); + +// Builds the graph needed for constructing the StratifiedSets for the +// given function +static void buildGraphFrom(CFLAliasAnalysis &, Function *, + SmallVectorImpl &, NodeMapT &, GraphT &); + +// Builds the graph + StratifiedSets for a function. +static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *); + +static Optional parentFunctionOfValue(Value *Val) { + if (auto *Inst = dyn_cast(Val)) { + auto *Bb = Inst->getParent(); + return Bb->getParent(); + } + + if (auto *Arg = dyn_cast(Val)) + return Arg->getParent(); + return NoneType(); +} + +template +static bool getPossibleTargets(Inst *Call, + SmallVectorImpl &Output) { + if (auto *Fn = Call->getCalledFunction()) { + Output.push_back(Fn); + return true; + } + + // TODO: If the call is indirect, we might be able to enumerate all potential + // targets of the call and return them, rather than just failing. + return false; +} + +static Optional getTargetValue(Instruction *Inst) { + GetTargetValueVisitor V; + return V.visit(Inst); +} + +static bool hasUsefulEdges(Instruction *Inst) { + bool IsNonInvokeTerminator = + isa(Inst) && !isa(Inst); + return !isa(Inst) && !isa(Inst) && !IsNonInvokeTerminator; +} + +static Optional valueToAttrIndex(Value *Val) { + if (isa(Val)) + return AttrGlobalIndex; + + if (auto *Arg = dyn_cast(Val)) + if (!Arg->hasNoAliasAttr()) + return argNumberToAttrIndex(Arg->getArgNo()); + return NoneType(); +} + +static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) { + if (ArgNum > AttrMaxNumArgs) + return AttrAllIndex; + return ArgNum + AttrFirstArgIndex; +} + +static EdgeType flipWeight(EdgeType Initial) { + switch (Initial) { + case EdgeType::Assign: + return EdgeType::Assign; + case EdgeType::Dereference: + return EdgeType::Reference; + case EdgeType::Reference: + return EdgeType::Dereference; + } + llvm_unreachable("Incomplete coverage of EdgeType enum"); +} + +static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, + SmallVectorImpl &Output) { + GetEdgesVisitor v(Analysis, Output); + v.visit(Inst); +} + +static Level directionOfEdgeType(EdgeType Weight) { + switch (Weight) { + case EdgeType::Reference: + return Level::Above; + case EdgeType::Dereference: + return Level::Below; + case EdgeType::Assign: + return Level::Same; + } + llvm_unreachable("Incomplete switch coverage"); +} + +// Aside: We may remove graph construction entirely, because it doesn't really +// buy us much that we don't already have. I'd like to add interprocedural +// analysis prior to this however, in case that somehow requires the graph +// produced by this for efficient execution +static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, + SmallVectorImpl &ReturnedValues, + NodeMapT &Map, GraphT &Graph) { + const auto findOrInsertNode = [&Map, &Graph](Value *Val) { + auto Pair = Map.insert(std::make_pair(Val, GraphT::Node())); + auto &Iter = Pair.first; + if (Pair.second) { + auto NewNode = Graph.addNode(); + Iter->second = NewNode; + } + return Iter->second; + }; + + SmallVector Edges; + for (auto &Bb : Fn->getBasicBlockList()) { + for (auto &Inst : Bb.getInstList()) { + // We don't want the edges of most "return" instructions, but we *do* want + // to know what can be returned. + if (auto *Ret = dyn_cast(&Inst)) + ReturnedValues.push_back(Ret); + + if (!hasUsefulEdges(&Inst)) + continue; + + Edges.clear(); + argsToEdges(Analysis, &Inst, Edges); + + // In the case of an unused alloca (or similar), edges may be empty. Note + // that it exists so we can potentially answer NoAlias. + if (Edges.empty()) { + auto MaybeVal = getTargetValue(&Inst); + assert(MaybeVal.hasValue()); + auto *Target = *MaybeVal; + findOrInsertNode(Target); + continue; + } + + for (const Edge &E : Edges) { + auto To = findOrInsertNode(E.To); + auto From = findOrInsertNode(E.From); + auto FlippedWeight = flipWeight(E.Weight); + auto Attrs = E.AdditionalAttrs; + Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs), + std::make_pair(FlippedWeight, Attrs)); + } + } + } +} + +static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { + NodeMapT Map; + GraphT Graph; + SmallVector ReturnedValues; + + buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph); + + DenseMap NodeValueMap; + NodeValueMap.resize(Map.size()); + for (const auto &Pair : Map) + NodeValueMap.insert(std::make_pair(Pair.second, Pair.first)); + + const auto findValueOrDie = [&NodeValueMap](GraphT::Node Node) { + auto ValIter = NodeValueMap.find(Node); + assert(ValIter != NodeValueMap.end()); + return ValIter->second; + }; + + StratifiedSetsBuilder Builder; + + SmallVector Worklist; + for (auto &Pair : Map) { + Worklist.clear(); + + auto *Value = Pair.first; + Builder.add(Value); + auto InitialNode = Pair.second; + Worklist.push_back(InitialNode); + while (!Worklist.empty()) { + auto Node = Worklist.pop_back_val(); + auto *CurValue = findValueOrDie(Node); + if (isa(CurValue) && !isa(CurValue)) + continue; + + for (const auto &EdgeTuple : Graph.edgesFor(Node)) { + auto Weight = std::get<0>(EdgeTuple); + auto Label = Weight.first; + auto &OtherNode = std::get<1>(EdgeTuple); + auto *OtherValue = findValueOrDie(OtherNode); + + if (isa(OtherValue) && !isa(OtherValue)) + continue; + + bool Added; + switch (directionOfEdgeType(Label)) { + case Level::Above: + Added = Builder.addAbove(CurValue, OtherValue); + break; + case Level::Below: + Added = Builder.addBelow(CurValue, OtherValue); + break; + case Level::Same: + Added = Builder.addWith(CurValue, OtherValue); + break; + } + + if (Added) { + auto Aliasing = Weight.second; + if (auto MaybeCurIndex = valueToAttrIndex(CurValue)) + Aliasing.set(*MaybeCurIndex); + if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue)) + Aliasing.set(*MaybeOtherIndex); + Builder.noteAttributes(CurValue, Aliasing); + Builder.noteAttributes(OtherValue, Aliasing); + Worklist.push_back(OtherNode); + } + } + } + } + + // There are times when we end up with parameters not in our graph (i.e. if + // it's only used as the condition of a branch). Other bits of code depend on + // things that were present during construction being present in the graph. + // So, we add all present arguments here. + for (auto &Arg : Fn->args()) { + Builder.add(&Arg); + } + + return FunctionInfo(Builder.build(), std::move(ReturnedValues)); +} + +void CFLAliasAnalysis::scan(Function *Fn) { + auto InsertPair = Cache.insert(std::make_pair(Fn, Optional())); + (void)InsertPair; + assert(InsertPair.second && + "Trying to scan a function that has already been cached"); + + FunctionInfo Info(buildSetsFrom(*this, Fn)); + Cache[Fn] = std::move(Info); + Handles.push_front(FunctionHandle(Fn, this)); +} + +AliasAnalysis::AliasResult +CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA, + const AliasAnalysis::Location &LocB) { + auto *ValA = const_cast(LocA.Ptr); + auto *ValB = const_cast(LocB.Ptr); + + Function *Fn = nullptr; + auto MaybeFnA = parentFunctionOfValue(ValA); + auto MaybeFnB = parentFunctionOfValue(ValB); + if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) { + llvm_unreachable("Don't know how to extract the parent function " + "from values A or B"); + } + + if (MaybeFnA.hasValue()) { + Fn = *MaybeFnA; + assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) && + "Interprocedural queries not supported"); + } else { + Fn = *MaybeFnB; + } + + assert(Fn != nullptr); + auto &MaybeInfo = ensureCached(Fn); + assert(MaybeInfo.hasValue()); + + auto &Sets = MaybeInfo->Sets; + auto MaybeA = Sets.find(ValA); + if (!MaybeA.hasValue()) + return AliasAnalysis::MayAlias; + + auto MaybeB = Sets.find(ValB); + if (!MaybeB.hasValue()) + return AliasAnalysis::MayAlias; + + auto SetA = *MaybeA; + auto SetB = *MaybeB; + + if (SetA.Index == SetB.Index) + return AliasAnalysis::PartialAlias; + + auto AttrsA = Sets.getLink(SetA.Index).Attrs; + auto AttrsB = Sets.getLink(SetB.Index).Attrs; + // Stratified set attributes are used as markets to signify whether a member + // of a StratifiedSet (or a member of a set above the current set) has + // interacted with either arguments or globals. "Interacted with" meaning + // its value may be different depending on the value of an argument or + // global. The thought behind this is that, because arguments and globals + // may alias each other, if AttrsA and AttrsB have touched args/globals, + // we must conservatively say that they alias. However, if at least one of + // the sets has no values that could legally be altered by changing the value + // of an argument or global, then we don't have to be as conservative. + if (AttrsA.any() && AttrsB.any()) + return AliasAnalysis::MayAlias; + + return AliasAnalysis::NoAlias; +} diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index d1632fd..4e9664f 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -5,12 +5,14 @@ add_llvm_library(LLVMAnalysis AliasDebugger.cpp AliasSetTracker.cpp Analysis.cpp + AssumptionTracker.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp BlockFrequencyInfoImpl.cpp BranchProbabilityInfo.cpp CFG.cpp CFGPrinter.cpp + CFLAliasAnalysis.cpp CGSCCPassManager.cpp CaptureTracking.cpp CostModel.cpp @@ -20,6 +22,7 @@ add_llvm_library(LLVMAnalysis DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp + FunctionTargetTransformInfo.cpp IVUsers.cpp InstCount.cpp InstructionSimplify.cpp @@ -53,6 +56,7 @@ add_llvm_library(LLVMAnalysis TargetTransformInfo.cpp Trace.cpp TypeBasedAliasAnalysis.cpp + ScopedNoAliasAA.cpp ValueTracking.cpp ) diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 3708e60..a271729 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -20,8 +20,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" using namespace llvm; @@ -49,6 +51,65 @@ namespace { bool Captured; }; + + /// Only find pointer captures which happen before the given instruction. Uses + /// the dominator tree to determine whether one instruction is before another. + /// Only support the case where the Value is defined in the same basic block + /// as the given instruction and the use. + struct CapturesBefore : public CaptureTracker { + CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT, + bool IncludeI) + : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), + IncludeI(IncludeI), Captured(false) {} + + void tooManyUses() override { Captured = true; } + + bool shouldExplore(const Use *U) override { + Instruction *I = cast(U->getUser()); + if (BeforeHere == I && !IncludeI) + return false; + + BasicBlock *BB = I->getParent(); + // We explore this usage only if the usage can reach "BeforeHere". + // If use is not reachable from entry, there is no need to explore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + // If the value is defined in the same basic block as use and BeforeHere, + // there is no need to explore the use if BeforeHere dominates use. + // Check whether there is a path from I to BeforeHere. + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !isPotentiallyReachable(I, BeforeHere, DT)) + return false; + return true; + } + + bool captured(const Use *U) override { + if (isa(U->getUser()) && !ReturnCaptures) + return false; + + Instruction *I = cast(U->getUser()); + if (BeforeHere == I && !IncludeI) + return false; + + BasicBlock *BB = I->getParent(); + // Same logic as in shouldExplore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !isPotentiallyReachable(I, BeforeHere, DT)) + return false; + Captured = true; + return true; + } + + const Instruction *BeforeHere; + DominatorTree *DT; + + bool ReturnCaptures; + bool IncludeI; + + bool Captured; + }; } /// PointerMayBeCaptured - Return true if this pointer value may be captured @@ -74,6 +135,32 @@ bool llvm::PointerMayBeCaptured(const Value *V, return SCT.Captured; } +/// PointerMayBeCapturedBefore - Return true if this pointer value may be +/// captured by the enclosing function (which is required to exist). If a +/// DominatorTree is provided, only captures which happen before the given +/// instruction are considered. This routine can be expensive, so consider +/// caching the results. The boolean ReturnCaptures specifies whether +/// returning the value (or part of it) from the function counts as capturing +/// it or not. The boolean StoreCaptures specified whether storing the value +/// (or part of it) into memory anywhere automatically counts as capturing it +/// or not. +bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, + bool StoreCaptures, const Instruction *I, + DominatorTree *DT, bool IncludeI) { + assert(!isa(V) && + "It doesn't make sense to ask whether a global is captured."); + + if (!DT) + return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures); + + // TODO: See comment in PointerMayBeCaptured regarding what could be done + // with StoreCaptures. + + CapturesBefore CB(ReturnCaptures, I, DT, IncludeI); + PointerMayBeCaptured(V, &CB); + return CB.Captured; +} + /// TODO: Write a new FunctionPass AliasAnalysis so that it can keep /// a cache. Then we can move the code from BasicAliasAnalysis into /// that path, and remove this threshold. @@ -152,7 +239,7 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { if (Count++ >= Threshold) return Tracker->tooManyUses(); - if (Visited.insert(&UU)) + if (Visited.insert(&UU).second) if (Tracker->shouldExplore(&UU)) Worklist.push_back(&UU); } diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 4c8a093..f29e4a2 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -11,23 +11,101 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "code-metrics" using namespace llvm; +static void completeEphemeralValues(SmallVector &WorkSet, + SmallPtrSetImpl &EphValues) { + SmallPtrSet Visited; + + // Make sure that all of the items in WorkSet are in our EphValues set. + EphValues.insert(WorkSet.begin(), WorkSet.end()); + + // Note: We don't speculate PHIs here, so we'll miss instruction chains kept + // alive only by ephemeral values. + + while (!WorkSet.empty()) { + const Value *V = WorkSet.front(); + WorkSet.erase(WorkSet.begin()); + + if (!Visited.insert(V).second) + continue; + + // If all uses of this value are ephemeral, then so is this value. + bool FoundNEUse = false; + for (const User *I : V->users()) + if (!EphValues.count(I)) { + FoundNEUse = true; + break; + } + + if (FoundNEUse) + continue; + + EphValues.insert(V); + DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n"); + + if (const User *U = dyn_cast(V)) + for (const Value *J : U->operands()) { + if (isSafeToSpeculativelyExecute(J)) + WorkSet.push_back(J); + } + } +} + +// Find all ephemeral values. +void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT, + SmallPtrSetImpl &EphValues) { + SmallVector WorkSet; + + for (auto &I : AT->assumptions(L->getHeader()->getParent())) { + // Filter out call sites outside of the loop so we don't to a function's + // worth of work for each of its loops (and, in the common case, ephemeral + // values in the loop are likely due to @llvm.assume calls in the loop). + if (!L->contains(I->getParent())) + continue; + + WorkSet.push_back(I); + } + + completeEphemeralValues(WorkSet, EphValues); +} + +void CodeMetrics::collectEphemeralValues(const Function *F, AssumptionTracker *AT, + SmallPtrSetImpl &EphValues) { + SmallVector WorkSet; + + for (auto &I : AT->assumptions(const_cast(F))) + WorkSet.push_back(I); + + completeEphemeralValues(WorkSet, EphValues); +} + /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + SmallPtrSetImpl &EphValues) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Skip ephemeral values. + if (EphValues.count(II)) + continue; + // Special handling for calls. if (isa(II) || isa(II)) { ImmutableCallSite CS(cast(II)); diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index eb3e2c6..fd8f2ae 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -47,15 +47,16 @@ using namespace llvm; // Constant Folding internal helper functions //===----------------------------------------------------------------------===// -/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with -/// DataLayout. This always returns a non-null constant, but it may be a +/// Constant fold bitcast, symbolically evaluating it with DataLayout. +/// This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &TD) { // Catch the obvious splat cases. if (C->isNullValue() && !DestTy->isX86_MMXTy()) return Constant::getNullValue(DestTy); - if (C->isAllOnesValue() && !DestTy->isX86_MMXTy()) + if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() && + !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types! return Constant::getAllOnesValue(DestTy); // Handle a vector->integer cast. @@ -197,7 +198,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle: bitcast (<2 x i64> to <4 x i32>) unsigned Ratio = NumDstElt/NumSrcElt; - unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits(); + unsigned DstBitSize = TD.getTypeSizeInBits(DstEltTy); // Loop over each source value, expanding into multiple results. for (unsigned i = 0; i != NumSrcElt; ++i) { @@ -213,6 +214,15 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + // Truncate the element to an integer with the same pointer size and + // convert the element back to a pointer using a inttoptr. + if (DstEltTy->isPointerTy()) { + IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize); + Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy); + Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy)); + continue; + } + // Truncate and remember this piece. Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy)); } @@ -222,9 +232,8 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } -/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset -/// from a global, return the global and the constant. Because of -/// constantexprs, this function is recursive. +/// If this constant is a constant offset from a global, return the global and +/// the constant. Because of constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &TD) { // Trivial case, constant is the global. @@ -240,7 +249,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || - CE->getOpcode() == Instruction::BitCast) + CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) @@ -263,10 +273,10 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return true; } -/// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the -/// constant being copied out of. ByteOffset is an offset into C. CurPtr is the -/// pointer to copy results into and BytesLeft is the number of bytes left in -/// the CurPtr buffer. TD is the target data. +/// Recursive helper to read bits out of global. C is the constant being copied +/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy +/// results into and BytesLeft is the number of bytes left in +/// the CurPtr buffer. TD is the target data. static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, unsigned BytesLeft, const DataLayout &TD) { @@ -517,9 +527,8 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, return nullptr; } -/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would -/// produce if it is constant and determinable. If this is not determinable, -/// return null. +/// Return the value that a load from C would produce if it is constant and +/// determinable. If this is not determinable, return null. Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, const DataLayout *TD) { // First, try the easy cases: @@ -609,7 +618,7 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ return nullptr; } -/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. +/// One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging /// these together. If target data info is available, it is provided as DL, /// otherwise DL is null. @@ -666,9 +675,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, return nullptr; } -/// CastGEPIndices - If array indices are not pointer-sized integers, -/// explicitly cast them so that they aren't implicitly casted by the -/// getelementptr. +/// If array indices are not pointer-sized integers, explicitly cast them so +/// that they aren't implicitly casted by the getelementptr. static Constant *CastGEPIndices(ArrayRef Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { @@ -723,8 +731,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) { return Ptr; } -/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP -/// constant expression, do so. +/// If we can symbolically evaluate the GEP constant expression, do so. static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { @@ -886,7 +893,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // Constant Folding public APIs //===----------------------------------------------------------------------===// -/// ConstantFoldInstruction - Try to constant fold the specified instruction. +/// Try to constant fold the specified instruction. /// If successful, the constant result is returned, if not, null is returned. /// Note that this fails if not all of the operands are constant. Otherwise, /// this function can only fail when attempting to fold instructions like loads @@ -966,7 +973,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, static Constant * ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, const TargetLibraryInfo *TLI, - SmallPtrSet &FoldedOps) { + SmallPtrSetImpl &FoldedOps) { SmallVector Ops; for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { @@ -974,7 +981,7 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, // Recursively fold the ConstantExpr's operands. If we have already folded // a ConstantExpr, we don't have to process it again. if (ConstantExpr *NewCE = dyn_cast(NewC)) { - if (FoldedOps.insert(NewCE)) + if (FoldedOps.insert(NewCE).second) NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps); } Ops.push_back(NewC); @@ -986,7 +993,7 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); } -/// ConstantFoldConstantExpression - Attempt to fold the constant expression +/// Attempt to fold the constant expression /// using the specified DataLayout. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, @@ -996,7 +1003,7 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps); } -/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the +/// Attempt to constant fold an instruction with the /// specified opcode and operands. If successful, the constant result is /// returned, if not, null is returned. Note that this function can fail when /// attempting to fold instructions like loads and stores, which have no @@ -1101,10 +1108,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, } } -/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare +/// Attempt to constant fold a compare /// instruction (icmp/fcmp) with the specified operands. If it fails, it /// returns a constant expression of the specified operands. -/// Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *Ops0, Constant *Ops1, const DataLayout *TD, @@ -1191,9 +1197,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, } -/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a -/// getelementptr constantexpr, return the constant value being addressed by the -/// constant expression, or null if something is funny and we can't decide. +/// Given a constant and a getelementptr constantexpr, return the constant value +/// being addressed by the constant expression, or null if something is funny +/// and we can't decide. Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { if (!CE->getOperand(1)->isNullValue()) @@ -1209,10 +1215,9 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, return C; } -/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr -/// indices (with an *implied* zero pointer index that is not in the list), -/// return the constant value being addressed by a virtual load, or null if -/// something is funny and we can't decide. +/// Given a constant and getelementptr indices (with an *implied* zero pointer +/// index that is not in the list), return the constant value being addressed by +/// a virtual load, or null if something is funny and we can't decide. Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, ArrayRef Indices) { // Loop over all of the operands, tracking down which value we are @@ -1230,11 +1235,12 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // Constant Folding for Calls // -/// canConstantFoldCallTo - Return true if its even possible to fold a call to -/// the specified function. +/// Return true if it's even possible to fold a call to the specified function. bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { case Intrinsic::fabs: + case Intrinsic::minnum: + case Intrinsic::maxnum: case Intrinsic::log: case Intrinsic::log2: case Intrinsic::log10: @@ -1320,7 +1326,7 @@ static Constant *GetConstantFoldFPValue(double V, Type *Ty) { } namespace { -/// llvm_fenv_clearexcept - Clear the floating-point exception state. +/// Clear the floating-point exception state. static inline void llvm_fenv_clearexcept() { #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT feclearexcept(FE_ALL_EXCEPT); @@ -1328,7 +1334,7 @@ static inline void llvm_fenv_clearexcept() { errno = 0; } -/// llvm_fenv_testexcept - Test if a floating-point exception was raised. +/// Test if a floating-point exception was raised. static inline bool llvm_fenv_testexcept() { int errno_val = errno; if (errno_val == ERANGE || errno_val == EDOM) @@ -1365,14 +1371,13 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return GetConstantFoldFPValue(V, Ty); } -/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer -/// conversion of a constant floating point. If roundTowardZero is false, the -/// default IEEE rounding is used (toward nearest, ties to even). This matches -/// the behavior of the non-truncating SSE instructions in the default rounding -/// mode. The desired integer type Ty is used to select how many bits are -/// available for the result. Returns null if the conversion cannot be -/// performed, otherwise returns the Constant value resulting from the -/// conversion. +/// Attempt to fold an SSE floating point to integer conversion of a constant +/// floating point. If roundTowardZero is false, the default IEEE rounding is +/// used (toward nearest, ties to even). This matches the behavior of the +/// non-truncating SSE instructions in the default rounding mode. The desired +/// integer type Ty is used to select how many bits are available for the +/// result. Returns null if the conversion cannot be performed, otherwise +/// returns the Constant value resulting from the conversion. static Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero, Type *Ty) { // All of these conversion intrinsics form an integer of at most 64bits. @@ -1519,8 +1524,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { if (V >= -0.0) return ConstantFoldFP(sqrt, V, Ty); - else // Undefined - return Constant::getNullValue(Ty); + else { + // Unlike the sqrt definitions in C/C++, POSIX, and IEEE-754 - which + // all guarantee or favor returning NaN - the square root of a + // negative number is not defined for the LLVM sqrt intrinsic. + // This is because the intrinsic should only be emitted in place of + // libm's sqrt function when using "no-nans-fp-math". + return UndefValue::get(Ty); + } } break; case 's': @@ -1626,6 +1637,19 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, V1.copySign(V2); return ConstantFP::get(Ty->getContext(), V1); } + + if (IntrinsicID == Intrinsic::minnum) { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + return ConstantFP::get(Ty->getContext(), minnum(C1, C2)); + } + + if (IntrinsicID == Intrinsic::maxnum) { + const APFloat &C1 = Op1->getValueAPF(); + const APFloat &C2 = Op2->getValueAPF(); + return ConstantFP::get(Ty->getContext(), maxnum(C1, C2)); + } + if (!TLI) return nullptr; if (Name == "pow" && TLI->has(LibFunc::pow)) @@ -1761,7 +1785,7 @@ static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, return ConstantVector::get(Result); } -/// ConstantFoldCall - Attempt to constant fold a call to the specified function +/// Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. Constant * llvm::ConstantFoldCall(Function *F, ArrayRef Operands, diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index d0784f1..092df5c 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -163,16 +163,15 @@ void dumpExampleDependence(raw_ostream &OS, Function *F, DstI != DstE; ++DstI) { if (isa(*DstI) || isa(*DstI)) { OS << "da analyze - "; - if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) { + if (auto D = DA->depends(&*SrcI, &*DstI, true)) { D->dump(OS); for (unsigned Level = 1; Level <= D->getLevels(); Level++) { if (D->isSplitable(Level)) { OS << "da analyze - split level = " << Level; - OS << ", iteration = " << *DA->getSplitIteration(D, Level); + OS << ", iteration = " << *DA->getSplitIteration(*D, Level); OS << "!\n"; } } - delete D; } else OS << "none!\n"; @@ -782,6 +781,25 @@ void DependenceAnalysis::collectCommonLoops(const SCEV *Expression, } } +void DependenceAnalysis::unifySubscriptType(Subscript *Pair) { + const SCEV *Src = Pair->Src; + const SCEV *Dst = Pair->Dst; + IntegerType *SrcTy = dyn_cast(Src->getType()); + IntegerType *DstTy = dyn_cast(Dst->getType()); + if (SrcTy == nullptr || DstTy == nullptr) { + assert(SrcTy == DstTy && "This function only unify integer types and " + "expect Src and Dst share the same type " + "otherwise."); + return; + } + if (SrcTy->getBitWidth() > DstTy->getBitWidth()) { + // Sign-extend Dst to typeof(Src) if typeof(Src) is wider than typeof(Dst). + Pair->Dst = SE->getSignExtendExpr(Dst, SrcTy); + } else if (SrcTy->getBitWidth() < DstTy->getBitWidth()) { + // Sign-extend Src to typeof(Dst) if typeof(Dst) is wider than typeof(Src). + Pair->Src = SE->getSignExtendExpr(Src, DstTy); + } +} // removeMatchingExtensions - Examines a subscript pair. // If the source and destination are identically sign (or zero) @@ -794,9 +812,11 @@ void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) { (isa(Src) && isa(Dst))) { const SCEVCastExpr *SrcCast = cast(Src); const SCEVCastExpr *DstCast = cast(Dst); - if (SrcCast->getType() == DstCast->getType()) { - Pair->Src = SrcCast->getOperand(); - Pair->Dst = DstCast->getOperand(); + const SCEV *SrcCastOp = SrcCast->getOperand(); + const SCEV *DstCastOp = DstCast->getOperand(); + if (SrcCastOp->getType() == DstCastOp->getType()) { + Pair->Src = SrcCastOp; + Pair->Dst = DstCastOp; } } } @@ -2957,15 +2977,11 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, AddRec->getNoWrapFlags()); } if (SE->isLoopInvariant(AddRec, TargetLoop)) - return SE->getAddRecExpr(AddRec, - Value, - TargetLoop, - SCEV::FlagAnyWrap); - return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), - TargetLoop, Value), - AddRec->getStepRecurrence(*SE), - AddRec->getLoop(), - AddRec->getNoWrapFlags()); + return SE->getAddRecExpr(AddRec, Value, TargetLoop, SCEV::FlagAnyWrap); + return SE->getAddRecExpr( + addToCoefficient(AddRec->getStart(), TargetLoop, Value), + AddRec->getStepRecurrence(*SE), AddRec->getLoop(), + AddRec->getNoWrapFlags()); } @@ -3183,7 +3199,7 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, SmallVectorImpl &Pair, - const SCEV *ElementSize) const { + const SCEV *ElementSize) { const SCEVUnknown *SrcBase = dyn_cast(SE->getPointerBase(SrcSCEV)); const SCEVUnknown *DstBase = @@ -3238,6 +3254,7 @@ bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, for (int i = 0; i < size; ++i) { Pair[i].Src = SrcSubscripts[i]; Pair[i].Dst = DstSubscripts[i]; + unifySubscriptType(&Pair[i]); // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the // delinearization has found, and add these constraints to the dependence @@ -3277,9 +3294,9 @@ static void dumpSmallBitVector(SmallBitVector &BV) { // // Care is required to keep the routine below, getSplitIteration(), // up to date with respect to this routine. -Dependence *DependenceAnalysis::depends(Instruction *Src, - Instruction *Dst, - bool PossiblyLoopIndependent) { +std::unique_ptr +DependenceAnalysis::depends(Instruction *Src, Instruction *Dst, + bool PossiblyLoopIndependent) { if (Src == Dst) PossiblyLoopIndependent = false; @@ -3291,7 +3308,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. DEBUG(dbgs() << "can only handle simple loads and stores\n"); - return new Dependence(Src, Dst); + return make_unique(Src, Dst); } Value *SrcPtr = getPointerOperand(Src); @@ -3302,7 +3319,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case AliasAnalysis::PartialAlias: // cannot analyse objects if we don't understand their aliasing. DEBUG(dbgs() << "can't analyze may or partial alias\n"); - return new Dependence(Src, Dst); + return make_unique(Src, Dst); case AliasAnalysis::NoAlias: // If the objects noalias, they are distinct, accesses are independent. DEBUG(dbgs() << "no alias\n"); @@ -3346,6 +3363,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, ++SrcIdx, ++DstIdx, ++P) { Pair[P].Src = SE->getSCEV(*SrcIdx); Pair[P].Dst = SE->getSCEV(*DstIdx); + unifySubscriptType(&Pair[P]); } } else { @@ -3675,9 +3693,9 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, return nullptr; } - FullDependence *Final = new FullDependence(Result); + auto Final = make_unique(Result); Result.DV = nullptr; - return Final; + return std::move(Final); } @@ -3729,13 +3747,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, // // breaks the dependence and allows us to vectorize/parallelize // both loops. -const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, +const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep, unsigned SplitLevel) { - assert(Dep && "expected a pointer to a Dependence"); - assert(Dep->isSplitable(SplitLevel) && + assert(Dep.isSplitable(SplitLevel) && "Dep should be splitable at SplitLevel"); - Instruction *Src = Dep->getSrc(); - Instruction *Dst = Dep->getDst(); + Instruction *Src = Dep.getSrc(); + Instruction *Dst = Dep.getDst(); assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); assert(isLoadOrStore(Src)); diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 74594f8..7ba91bc 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -8,133 +8,50 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" + using namespace llvm; +namespace llvm { +template class DominanceFrontierBase; +template class ForwardDominanceFrontierBase; +} + char DominanceFrontier::ID = 0; + INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier", "Dominance Frontier Construction", true, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(DominanceFrontier, "domfrontier", "Dominance Frontier Construction", true, true) -namespace { - class DFCalculateWorkObject { - public: - DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, - const DomTreeNode *N, - const DomTreeNode *PN) - : currentBB(B), parentBB(P), Node(N), parentNode(PN) {} - BasicBlock *currentBB; - BasicBlock *parentBB; - const DomTreeNode *Node; - const DomTreeNode *parentNode; - }; +DominanceFrontier::DominanceFrontier() + : FunctionPass(ID), + Base() { + initializeDominanceFrontierPass(*PassRegistry::getPassRegistry()); } -void DominanceFrontier::anchor() { } - -const DominanceFrontier::DomSetType & -DominanceFrontier::calculate(const DominatorTree &DT, - const DomTreeNode *Node) { - BasicBlock *BB = Node->getBlock(); - DomSetType *Result = nullptr; - - std::vector workList; - SmallPtrSet visited; - - workList.push_back(DFCalculateWorkObject(BB, nullptr, Node, nullptr)); - do { - DFCalculateWorkObject *currentW = &workList.back(); - assert (currentW && "Missing work object."); - - BasicBlock *currentBB = currentW->currentBB; - BasicBlock *parentBB = currentW->parentBB; - const DomTreeNode *currentNode = currentW->Node; - const DomTreeNode *parentNode = currentW->parentNode; - assert (currentBB && "Invalid work object. Missing current Basic Block"); - assert (currentNode && "Invalid work object. Missing current Node"); - DomSetType &S = Frontiers[currentBB]; - - // Visit each block only once. - if (visited.count(currentBB) == 0) { - visited.insert(currentBB); - - // Loop over CFG successors to calculate DFlocal[currentNode] - for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB); - SI != SE; ++SI) { - // Does Node immediately dominate this successor? - if (DT[*SI]->getIDom() != currentNode) - S.insert(*SI); - } - } - - // At this point, S is DFlocal. Now we union in DFup's of our children... - // Loop through and visit the nodes that Node immediately dominates (Node's - // children in the IDomTree) - bool visitChild = false; - for (DomTreeNode::const_iterator NI = currentNode->begin(), - NE = currentNode->end(); NI != NE; ++NI) { - DomTreeNode *IDominee = *NI; - BasicBlock *childBB = IDominee->getBlock(); - if (visited.count(childBB) == 0) { - workList.push_back(DFCalculateWorkObject(childBB, currentBB, - IDominee, currentNode)); - visitChild = true; - } - } - - // If all children are visited or there is any child then pop this block - // from the workList. - if (!visitChild) { - - if (!parentBB) { - Result = &S; - break; - } - - DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end(); - DomSetType &parentSet = Frontiers[parentBB]; - for (; CDFI != CDFE; ++CDFI) { - if (!DT.properlyDominates(parentNode, DT[*CDFI])) - parentSet.insert(*CDFI); - } - workList.pop_back(); - } +void DominanceFrontier::releaseMemory() { + Base.releaseMemory(); +} - } while (!workList.empty()); +bool DominanceFrontier::runOnFunction(Function &) { + releaseMemory(); + Base.analyze(getAnalysis().getDomTree()); + return false; +} - return *Result; +void DominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); } -void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const { - for (const_iterator I = begin(), E = end(); I != E; ++I) { - OS << " DomFrontier for BB "; - if (I->first) - I->first->printAsOperand(OS, false); - else - OS << " <>"; - OS << " is:\t"; - - const std::set &BBs = I->second; - - for (std::set::const_iterator I = BBs.begin(), E = BBs.end(); - I != E; ++I) { - OS << ' '; - if (*I) - (*I)->printAsOperand(OS, false); - else - OS << "<>"; - } - OS << "\n"; - } +void DominanceFrontier::print(raw_ostream &OS, const Module *) const { + Base.print(OS); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void DominanceFrontierBase::dump() const { +void DominanceFrontier::dump() const { print(dbgs()); } #endif - diff --git a/lib/Analysis/FunctionTargetTransformInfo.cpp b/lib/Analysis/FunctionTargetTransformInfo.cpp new file mode 100644 index 0000000..a686bec --- /dev/null +++ b/lib/Analysis/FunctionTargetTransformInfo.cpp @@ -0,0 +1,50 @@ +//===- llvm/Analysis/FunctionTargetTransformInfo.h --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass wraps a TargetTransformInfo in a FunctionPass so that it can +// forward along the current Function so that we can make target specific +// decisions based on the particular subtarget specified for each Function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm/Analysis/FunctionTargetTransformInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "function-tti" +static const char ftti_name[] = "Function TargetTransformInfo"; +INITIALIZE_PASS_BEGIN(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true) +char FunctionTargetTransformInfo::ID = 0; + +namespace llvm { +FunctionPass *createFunctionTargetTransformInfoPass() { + return new FunctionTargetTransformInfo(); +} +} + +FunctionTargetTransformInfo::FunctionTargetTransformInfo() + : FunctionPass(ID), Fn(nullptr), TTI(nullptr) { + initializeFunctionTargetTransformInfoPass(*PassRegistry::getPassRegistry()); +} + +void FunctionTargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} + +void FunctionTargetTransformInfo::releaseMemory() {} + +bool FunctionTargetTransformInfo::runOnFunction(Function &F) { + Fn = &F; + TTI = &getAnalysis(); + return false; +} diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index caec253..67cf7f8 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -267,7 +267,7 @@ INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction", char CallGraphWrapperPass::ID = 0; -void CallGraphWrapperPass::releaseMemory() { G.reset(nullptr); } +void CallGraphWrapperPass::releaseMemory() { G.reset(); } void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { if (!G) { @@ -282,6 +282,3 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } #endif - -// Enuse that users of CallGraph.h also link with this file -DEFINING_FILE_FOR(CallGraph) diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index c27edbf..665aa7f 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -243,7 +243,14 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, assert(!CallSites.count(I->first) && "Call site occurs in node multiple times"); - CallSites.insert(std::make_pair(I->first, I->second)); + + CallSite CS(I->first); + if (CS) { + Function *Callee = CS.getCalledFunction(); + // Ignore intrinsics because they're not really function calls. + if (!Callee || !(Callee->isIntrinsic())) + CallSites.insert(std::make_pair(I->first, I->second)); + } ++I; } diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 8807529..85db278 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -17,7 +17,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" @@ -49,6 +51,9 @@ class CallAnalyzer : public InstVisitor { /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; + /// The cache of @llvm.assume intrinsics. + AssumptionTracker *AT; + // The called function. Function &F; @@ -104,7 +109,7 @@ class CallAnalyzer : public InstVisitor { ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); // Custom analysis routines. - bool analyzeBlock(BasicBlock *BB); + bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); // Disable several entry points to the visitor so we don't accidentally use // them by declaring but not defining them here. @@ -141,8 +146,8 @@ class CallAnalyzer : public InstVisitor { public: CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI, - Function &Callee, int Threshold) - : DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + AssumptionTracker *AT, Function &Callee, int Threshold) + : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), @@ -778,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold); + CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. @@ -881,7 +886,8 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { /// aborts early if the threshold has been exceeded or an impossible to inline /// construct has been detected. It returns false if inlining is no longer /// viable, and true if inlining remains viable. -bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { +bool CallAnalyzer::analyzeBlock(BasicBlock *BB, + SmallPtrSetImpl &EphValues) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { // FIXME: Currently, the number of instructions in a function regardless of // our ability to simplify them during inline to constants or dead code, @@ -893,6 +899,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { if (isa(I)) continue; + // Skip ephemeral values. + if (EphValues.count(I)) + continue; + ++NumInstructions; if (isa(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; @@ -967,7 +977,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { break; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } while (Visited.insert(V)); + } while (Visited.insert(V).second); Type *IntPtrTy = DL->getIntPtrType(V->getContext()); return cast(ConstantInt::get(IntPtrTy, Offset)); @@ -1096,6 +1106,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); NumAllocaArgs = SROAArgValues.size(); + // FIXME: If a caller has multiple calls to a callee, we end up recomputing + // the ephemeral values multiple times (and they're completely determined by + // the callee, so this is purely duplicate work). + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(&F, AT, EphValues); + // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this // particular call site in order to get more accurate cost estimates. This @@ -1129,7 +1145,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. - if (!analyzeBlock(BB)) { + if (!analyzeBlock(BB, EphValues)) { if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || HasIndirectBr) return false; @@ -1217,6 +1233,7 @@ void CallAnalyzer::dump() { INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", true, true) INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", true, true) @@ -1228,12 +1245,14 @@ InlineCostAnalysis::~InlineCostAnalysis() {} void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); AU.addRequired(); CallGraphSCCPass::getAnalysisUsage(AU); } bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { TTI = &getAnalysis(); + AT = &getAnalysis(); return false; } @@ -1290,7 +1309,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CallAnalyzer CA(Callee->getDataLayout(), *TTI, *Callee, Threshold); + CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold); bool ShouldInline = CA.analyzeCall(CS); DEBUG(CA.dump()); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 24655aa..6b5f370 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -84,7 +84,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, /// form. static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, const LoopInfo *LI, - SmallPtrSet &SimpleLoopNests) { + SmallPtrSetImpl &SimpleLoopNests) { Loop *NearestLoop = nullptr; for (DomTreeNode *Rung = DT->getNode(BB); Rung; Rung = Rung->getIDom()) { @@ -112,10 +112,10 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. bool IVUsers::AddUsersImpl(Instruction *I, - SmallPtrSet &SimpleLoopNests) { + SmallPtrSetImpl &SimpleLoopNests) { // Add this IV user to the Processed set before returning false to ensure that // all IV users are members of the set. See IVUsers::isIVUserOrOperand. - if (!Processed.insert(I)) + if (!Processed.insert(I).second) return true; // Instruction already handled. if (!SE->isSCEVable(I->getType())) @@ -145,7 +145,7 @@ bool IVUsers::AddUsersImpl(Instruction *I, SmallPtrSet UniqueUsers; for (Use &U : I->uses()) { Instruction *User = cast(U.getUser()); - if (!UniqueUsers.insert(User)) + if (!UniqueUsers.insert(User).second) continue; // Do not infinitely recurse on PHI nodes. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index bd42af1..f151a3a 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -41,14 +41,20 @@ enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumReassoc, "Number of reassociations"); +namespace { struct Query { const DataLayout *DL; const TargetLibraryInfo *TLI; const DominatorTree *DT; + AssumptionTracker *AT; + const Instruction *CxtI; Query(const DataLayout *DL, const TargetLibraryInfo *tli, - const DominatorTree *dt) : DL(DL), TLI(tli), DT(dt) {} + const DominatorTree *dt, AssumptionTracker *at = nullptr, + const Instruction *cxti = nullptr) + : DL(DL), TLI(tli), DT(dt), AT(at), CxtI(cxti) {} }; +} // end anonymous namespace static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, @@ -575,9 +581,10 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), - RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, + Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } /// \brief Compute the base pointer and cumulative constant offsets for V. @@ -624,7 +631,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL, } assert(V->getType()->getScalarType()->isPointerTy() && "Unexpected operand type!"); - } while (Visited.insert(V)); + } while (Visited.insert(V).second); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); if (V->getType()->isVectorTy()) @@ -676,6 +683,18 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); + // X - (0 - Y) -> X if the second sub is NUW. + // If Y != 0, 0 - Y is a poison value. + // If Y == 0, 0 - Y simplifies to 0. + if (BinaryOperator::isNeg(Op1)) { + if (const auto *BO = dyn_cast(Op1)) { + assert(BO->getOpcode() == Instruction::Sub && + "Expected a subtraction operator!"); + if (BO->hasNoUnsignedWrap()) + return Op0; + } + } + // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. // For example, (X + Y) - Y -> X; (Y + X) - Y -> X Value *X = nullptr, *Y = nullptr, *Z = Op1; @@ -769,9 +788,10 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), - RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, + Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } /// Given operands for an FAdd, see if we can fold the result. If not, this @@ -947,28 +967,37 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can @@ -1028,6 +1057,16 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1))))) return Constant::getNullValue(Op0->getType()); + // (X /u C1) /u C2 -> 0 if C1 * C2 overflow + ConstantInt *C1, *C2; + if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) && + match(Op1, m_ConstantInt(C2))) { + bool Overflow; + C1->getValue().umul_ov(C2->getValue(), Overflow); + if (Overflow) + return Constant::getNullValue(Op0->getType()); + } + // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) @@ -1055,8 +1094,11 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } /// SimplifyUDivInst - Given operands for a UDiv, see if we can @@ -1071,8 +1113,11 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, @@ -1090,8 +1135,11 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } /// SimplifyRem - Given operands for an SRem or URem, see if we can @@ -1133,6 +1181,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); + // (X % Y) % Y -> X % Y + if ((Opcode == Instruction::SRem && + match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) || + (Opcode == Instruction::URem && + match(Op0, m_URem(m_Value(), m_Specific(Op1))))) + return Op0; + // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa(Op0) || isa(Op1)) @@ -1160,8 +1215,11 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } /// SimplifyURemInst - Given operands for a URem, see if we can @@ -1176,8 +1234,11 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, @@ -1195,8 +1256,11 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } /// isUndefShift - Returns true if a shift by \c Amount always yields undef. @@ -1264,6 +1328,32 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, return nullptr; } +/// \brief Given operands for an Shl, LShr or AShr, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1, + bool isExact, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse)) + return V; + + // X >> X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // The low bit cannot be shifted out of an exact shift if it is set. + if (isExact) { + unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); + APInt Op0KnownZero(BitWidth, 0); + APInt Op0KnownOne(BitWidth, 0); + computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AT, Q.CxtI, + Q.DT); + if (Op0KnownOne[0]) + return Op0; + } + + return nullptr; +} + /// SimplifyShlInst - Given operands for an Shl, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, @@ -1284,8 +1374,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT), + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } @@ -1293,12 +1384,9 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// fold the result. If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const Query &Q, unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse)) - return V; - - // X >> X -> 0 - if (Op0 == Op1) - return Constant::getNullValue(Op0->getType()); + if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, + MaxRecurse)) + return V; // undef >>l X -> 0 if (match(Op0, m_Undef())) @@ -1306,8 +1394,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, // (X << A) >> A -> X Value *X; - if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && - cast(Op0)->hasNoUnsignedWrap()) + if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1)))) return X; return nullptr; @@ -1316,8 +1403,10 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT), + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } @@ -1325,13 +1414,10 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, /// fold the result. If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const Query &Q, unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse)) + if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, + MaxRecurse)) return V; - // X >> X -> 0 - if (Op0 == Op1) - return Constant::getNullValue(Op0->getType()); - // all ones >>a X -> all ones if (match(Op0, m_AllOnes())) return Op0; @@ -1342,21 +1428,75 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, // (X << A) >> A -> X Value *X; - if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) && - cast(Op0)->hasNoSignedWrap()) + if (match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1)))) return X; + // Arithmetic shifting an all-sign-bit value is a no-op. + unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AT, Q.CxtI, Q.DT); + if (NumSignBits == Op0->getType()->getScalarSizeInBits()) + return Op0; + return nullptr; } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT), + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } +// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range +// of possible values cannot be satisfied. +static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { + ICmpInst::Predicate Pred0, Pred1; + ConstantInt *CI1, *CI2; + Value *V; + if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)), + m_ConstantInt(CI2)))) + return nullptr; + + if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1)))) + return nullptr; + + Type *ITy = Op0->getType(); + + auto *AddInst = cast(Op0->getOperand(0)); + bool isNSW = AddInst->hasNoSignedWrap(); + bool isNUW = AddInst->hasNoUnsignedWrap(); + + const APInt &CI1V = CI1->getValue(); + const APInt &CI2V = CI2->getValue(); + const APInt Delta = CI2V - CI1V; + if (CI1V.isStrictlyPositive()) { + if (Delta == 2) { + if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT) + return getFalse(ITy); + if (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT && isNSW) + return getFalse(ITy); + } + if (Delta == 1) { + if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_SGT) + return getFalse(ITy); + if (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGT && isNSW) + return getFalse(ITy); + } + } + if (CI1V.getBoolValue() && isNUW) { + if (Delta == 2) + if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Delta == 1) + if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGT) + return getFalse(ITy); + } + + return nullptr; +} + /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, @@ -1407,12 +1547,21 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true, 0, Q.AT, Q.CxtI, Q.DT)) return Op0; - if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true, 0, Q.AT, Q.CxtI, Q.DT)) return Op1; } + if (auto *ICILHS = dyn_cast(Op0)) { + if (auto *ICIRHS = dyn_cast(Op1)) { + if (Value *V = SimplifyAndOfICmps(ICILHS, ICIRHS)) + return V; + if (Value *V = SimplifyAndOfICmps(ICIRHS, ICILHS)) + return V; + } + } + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, MaxRecurse)) @@ -1447,8 +1596,58 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); +} + +// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union +// contains all possible values. +static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { + ICmpInst::Predicate Pred0, Pred1; + ConstantInt *CI1, *CI2; + Value *V; + if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)), + m_ConstantInt(CI2)))) + return nullptr; + + if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1)))) + return nullptr; + + Type *ITy = Op0->getType(); + + auto *AddInst = cast(Op0->getOperand(0)); + bool isNSW = AddInst->hasNoSignedWrap(); + bool isNUW = AddInst->hasNoUnsignedWrap(); + + const APInt &CI1V = CI1->getValue(); + const APInt &CI2V = CI2->getValue(); + const APInt Delta = CI2V - CI1V; + if (CI1V.isStrictlyPositive()) { + if (Delta == 2) { + if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE) + return getTrue(ITy); + if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW) + return getTrue(ITy); + } + if (Delta == 1) { + if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE) + return getTrue(ITy); + if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW) + return getTrue(ITy); + } + } + if (CI1V.getBoolValue() && isNUW) { + if (Delta == 2) + if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE) + return getTrue(ITy); + if (Delta == 1) + if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + + return nullptr; } /// SimplifyOrInst - Given operands for an Or, see if we can @@ -1508,6 +1707,15 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, (A == Op0 || B == Op0)) return Constant::getAllOnesValue(Op0->getType()); + if (auto *ICILHS = dyn_cast(Op0)) { + if (auto *ICIRHS = dyn_cast(Op1)) { + if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS)) + return V; + if (Value *V = SimplifyOrOfICmps(ICIRHS, ICILHS)) + return V; + } + } + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, MaxRecurse)) @@ -1540,18 +1748,22 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ match(A, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + if (V1 == B && MaskedValueIsZero(V2, C2->getValue(), Q.DL, + 0, Q.AT, Q.CxtI, Q.DT)) return A; - if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + if (V2 == B && MaskedValueIsZero(V1, C2->getValue(), Q.DL, + 0, Q.AT, Q.CxtI, Q.DT)) return A; } // Or commutes, try both ways. if ((C1->getValue() & (C1->getValue() + 1)) == 0 && match(B, m_Add(m_Value(V1), m_Value(V2)))) { // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + if (V1 == A && MaskedValueIsZero(V2, C1->getValue(), Q.DL, + 0, Q.AT, Q.CxtI, Q.DT)) return B; - if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + if (V2 == A && MaskedValueIsZero(V1, C1->getValue(), Q.DL, + 0, Q.AT, Q.CxtI, Q.DT)) return B; } } @@ -1568,8 +1780,10 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } /// SimplifyXorInst - Given operands for a Xor, see if we can @@ -1623,8 +1837,10 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } static Type *GetCompareTy(Value *Op) { @@ -1878,40 +2094,46 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, Q.DL)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.AT, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, Q.DL)) + if (isKnownNonZero(LHS, Q.DL, 0, Q.AT, Q.CxtI, Q.DT)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT); if (LHSKnownNegative) return getTrue(ITy); if (LHSKnownNonNegative) return getFalse(ITy); break; case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT); if (LHSKnownNegative) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT)) return getFalse(ITy); break; case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT); if (LHSKnownNegative) return getFalse(ITy); if (LHSKnownNonNegative) return getTrue(ITy); break; case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT); if (LHSKnownNegative) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT)) return getTrue(ITy); break; } @@ -1958,13 +2180,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Lower = (-Upper) + 1; } } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { - // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. APInt IntMin = APInt::getSignedMinValue(Width); APInt IntMax = APInt::getSignedMaxValue(Width); - APInt Val = CI2->getValue().abs(); - if (!Val.isMinValue()) { + APInt Val = CI2->getValue(); + if (Val.isAllOnesValue()) { + // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX] + // where CI2 != -1 and CI2 != 0 and CI2 != 1 + Lower = IntMin + 1; + Upper = IntMax + 1; + } else if (Val.countLeadingZeros() < Width - 1) { + // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2] + // where CI2 != -1 and CI2 != 0 and CI2 != 1 Lower = IntMin.sdiv(Val); - Upper = IntMax.sdiv(Val) + 1; + Upper = IntMax.sdiv(Val); + if (Lower.sgt(Upper)) + std::swap(Lower, Upper); + Upper = Upper + 1; + assert(Upper != Lower && "Upper part of range has wrapped!"); + } + } else if (match(LHS, m_NUWShl(m_ConstantInt(CI2), m_Value()))) { + // 'shl nuw CI2, x' produces [CI2, CI2 << CLZ(CI2)] + Lower = CI2->getValue(); + Upper = Lower.shl(Lower.countLeadingZeros()) + 1; + } else if (match(LHS, m_NSWShl(m_ConstantInt(CI2), m_Value()))) { + if (CI2->isNegative()) { + // 'shl nsw CI2, x' produces [CI2 << CLO(CI2)-1, CI2] + unsigned ShiftAmount = CI2->getValue().countLeadingOnes() - 1; + Lower = CI2->getValue().shl(ShiftAmount); + Upper = CI2->getValue() + 1; + } else { + // 'shl nsw CI2, x' produces [CI2, CI2 << CLZ(CI2)-1] + unsigned ShiftAmount = CI2->getValue().countLeadingZeros() - 1; + Lower = CI2->getValue(); + Upper = CI2->getValue().shl(ShiftAmount) + 1; } } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) { // 'lshr x, CI2' produces [0, UINT_MAX >> CI2]. @@ -2174,25 +2422,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // If a bit is known to be zero for A and known to be one for B, - // then A and B cannot be equal. - if (ICmpInst::isEquality(Pred)) { - if (ConstantInt *CI = dyn_cast(RHS)) { - uint32_t BitWidth = CI->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); - if (((LHSKnownOne & RHSKnownZero) != 0) || - ((LHSKnownZero & RHSKnownOne) != 0)) - return (Pred == ICmpInst::ICMP_EQ) - ? ConstantInt::getFalse(CI->getContext()) - : ConstantInt::getTrue(CI->getContext()); - } - } - // Special logic for binary operators. BinaryOperator *LBO = dyn_cast(LHS); BinaryOperator *RBO = dyn_cast(RHS); @@ -2286,7 +2515,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2296,7 +2526,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2315,7 +2546,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2325,7 +2557,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, + 0, Q.AT, Q.CxtI, Q.DT); if (!KnownNonNegative) break; // fall-through @@ -2345,6 +2578,41 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); } + // handle: + // CI2 << X == CI + // CI2 << X != CI + // + // where CI2 is a power of 2 and CI isn't + if (auto *CI = dyn_cast(RHS)) { + const APInt *CI2Val, *CIVal = &CI->getValue(); + if (LBO && match(LBO, m_Shl(m_APInt(CI2Val), m_Value())) && + CI2Val->isPowerOf2()) { + if (!CIVal->isPowerOf2()) { + // CI2 << X can equal zero in some circumstances, + // this simplification is unsafe if CI is zero. + // + // We know it is safe if: + // - The shift is nsw, we can't shift out the one bit. + // - The shift is nuw, we can't shift out the one bit. + // - CI2 is one + // - CI isn't zero + if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() || + *CI2Val == 1 || !CI->isZero()) { + if (Pred == ICmpInst::ICMP_EQ) + return ConstantInt::getFalse(RHS->getContext()); + if (Pred == ICmpInst::ICMP_NE) + return ConstantInt::getTrue(RHS->getContext()); + } + } + if (CIVal->isSignBit() && *CI2Val == 1) { + if (Pred == ICmpInst::ICMP_UGT) + return ConstantInt::getFalse(RHS->getContext()); + if (Pred == ICmpInst::ICMP_ULE) + return ConstantInt::getTrue(RHS->getContext()); + } + } + } + if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && LBO->getOperand(1) == RBO->getOperand(1)) { switch (LBO->getOpcode()) { @@ -2592,6 +2860,23 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // If a bit is known to be zero for A and known to be one for B, + // then A and B cannot be equal. + if (ICmpInst::isEquality(Pred)) { + if (ConstantInt *CI = dyn_cast(RHS)) { + uint32_t BitWidth = CI->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AT, + Q.CxtI, Q.DT); + const APInt &RHSVal = CI->getValue(); + if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0)) + return Pred == ICmpInst::ICMP_EQ + ? ConstantInt::getFalse(CI->getContext()) + : ConstantInt::getTrue(CI->getContext()); + } + } + // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) @@ -2610,8 +2895,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), + const DominatorTree *DT, + AssumptionTracker *AT, + Instruction *CxtI) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } @@ -2707,8 +2994,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } @@ -2746,9 +3035,11 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (DL, TLI, DT), - RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, + Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can @@ -2756,29 +3047,72 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, static Value *SimplifyGEPInst(ArrayRef Ops, const Query &Q, unsigned) { // The type of the GEP pointer operand. PointerType *PtrTy = cast(Ops[0]->getType()->getScalarType()); + unsigned AS = PtrTy->getAddressSpace(); // getelementptr P -> P. if (Ops.size() == 1) return Ops[0]; - if (isa(Ops[0])) { - // Compute the (pointer) type returned by the GEP instruction. - Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); - Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace()); - if (VectorType *VT = dyn_cast(Ops[0]->getType())) - GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + // Compute the (pointer) type returned by the GEP instruction. + Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1)); + Type *GEPTy = PointerType::get(LastType, AS); + if (VectorType *VT = dyn_cast(Ops[0]->getType())) + GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + + if (isa(Ops[0])) return UndefValue::get(GEPTy); - } if (Ops.size() == 2) { // getelementptr P, 0 -> P. if (match(Ops[1], m_Zero())) return Ops[0]; - // getelementptr P, N -> P if P points to a type of zero size. - if (Q.DL) { - Type *Ty = PtrTy->getElementType(); - if (Ty->isSized() && Q.DL->getTypeAllocSize(Ty) == 0) + + Type *Ty = PtrTy->getElementType(); + if (Q.DL && Ty->isSized()) { + Value *P; + uint64_t C; + uint64_t TyAllocSize = Q.DL->getTypeAllocSize(Ty); + // getelementptr P, N -> P if P points to a type of zero size. + if (TyAllocSize == 0) return Ops[0]; + + // The following transforms are only safe if the ptrtoint cast + // doesn't truncate the pointers. + if (Ops[1]->getType()->getScalarSizeInBits() == + Q.DL->getPointerSizeInBits(AS)) { + auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * { + if (match(P, m_Zero())) + return Constant::getNullValue(GEPTy); + Value *Temp; + if (match(P, m_PtrToInt(m_Value(Temp)))) + if (Temp->getType() == GEPTy) + return Temp; + return nullptr; + }; + + // getelementptr V, (sub P, V) -> P if P points to a type of size 1. + if (TyAllocSize == 1 && + match(Ops[1], m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))))) + if (Value *R = PtrToIntOrZero(P)) + return R; + + // getelementptr V, (ashr (sub P, V), C) -> Q + // if P points to a type of size 1 << C. + if (match(Ops[1], + m_AShr(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), + m_ConstantInt(C))) && + TyAllocSize == 1ULL << C) + if (Value *R = PtrToIntOrZero(P)) + return R; + + // getelementptr V, (sdiv (sub P, V), C) -> Q + // if P points to a type of size C. + if (match(Ops[1], + m_SDiv(m_Sub(m_Value(P), m_PtrToInt(m_Specific(Ops[0]))), + m_SpecificInt(TyAllocSize)))) + if (Value *R = PtrToIntOrZero(P)) + return R; + } } } @@ -2792,8 +3126,9 @@ static Value *SimplifyGEPInst(ArrayRef Ops, const Query &Q, unsigned) { Value *llvm::SimplifyGEPInst(ArrayRef Ops, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we @@ -2829,8 +3164,11 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (DL, TLI, DT), + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, + Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } @@ -2877,8 +3215,11 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, + AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } //=== Helper functions for higher up the class hierarchy. @@ -2950,8 +3291,10 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT), RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT, AT, CxtI), + RecursionLimit); } /// SimplifyCmpInst - Given operands for a CmpInst, see if we can @@ -2965,8 +3308,9 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT), + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT, AT, CxtI), RecursionLimit); } @@ -3041,23 +3385,26 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, User::op_iterator ArgEnd, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT), + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AT, CxtI), RecursionLimit); } Value *llvm::SimplifyCall(Value *V, ArrayRef Args, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return ::SimplifyCall(V, Args.begin(), Args.end(), Query(DL, TLI, DT), - RecursionLimit); + const DominatorTree *DT, AssumptionTracker *AT, + const Instruction *CxtI) { + return ::SimplifyCall(V, Args.begin(), Args.end(), + Query(DL, TLI, DT, AT, CxtI), RecursionLimit); } /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { + const DominatorTree *DT, + AssumptionTracker *AT) { Value *Result; switch (I->getOpcode()) { @@ -3066,109 +3413,122 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT, AT, I); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - DL, TLI, DT); + DL, TLI, DT, AT, I); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT, AT, I); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - DL, TLI, DT); + DL, TLI, DT, AT, I); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT); + I->getFastMathFlags(), DL, TLI, DT, AT, I); break; case Instruction::Mul: - Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::FDiv: - Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::FRem: - Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), - DL, TLI, DT); + DL, TLI, DT, AT, I); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), - DL, TLI, DT); + DL, TLI, DT, AT, I); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), cast(I)->isExact(), - DL, TLI, DT); + DL, TLI, DT, AT, I); break; case Instruction::And: - Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::Or: - Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, + AT, I); break; case Instruction::Xor: - Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::ICmp: Result = SimplifyICmpInst(cast(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), DL, TLI, DT); + I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::FCmp: Result = SimplifyFCmpInst(cast(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), DL, TLI, DT); + I->getOperand(0), I->getOperand(1), + DL, TLI, DT, AT, I); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), DL, TLI, DT); + I->getOperand(2), DL, TLI, DT, AT, I); break; case Instruction::GetElementPtr: { SmallVector Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(Ops, DL, TLI, DT); + Result = SimplifyGEPInst(Ops, DL, TLI, DT, AT, I); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), DL, TLI, DT); + IV->getIndices(), DL, TLI, DT, AT, I); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast(I), Query (DL, TLI, DT)); + Result = SimplifyPHINode(cast(I), Query (DL, TLI, DT, AT, I)); break; case Instruction::Call: { CallSite CS(cast(I)); Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), - DL, TLI, DT); + DL, TLI, DT, AT, I); break; } case Instruction::Trunc: - Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT); + Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, + AT, I); break; } @@ -3192,7 +3552,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL, static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { + const DominatorTree *DT, + AssumptionTracker *AT) { bool Simplified = false; SmallSetVector Worklist; @@ -3219,7 +3580,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, I = Worklist[Idx]; // See if this instruction simplifies. - SimpleV = SimplifyInstruction(I, DL, TLI, DT); + SimpleV = SimplifyInstruction(I, DL, TLI, DT, AT); if (!SimpleV) continue; @@ -3245,15 +3606,17 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT); + const DominatorTree *DT, + AssumptionTracker *AT) { + return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AT); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const DataLayout *DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT) { + const DominatorTree *DT, + AssumptionTracker *AT) { assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); assert(SimpleV && "Must provide a simplified value."); - return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AT); } diff --git a/lib/Analysis/JumpInstrTableInfo.cpp b/lib/Analysis/JumpInstrTableInfo.cpp index b5b4265..7aae2a5 100644 --- a/lib/Analysis/JumpInstrTableInfo.cpp +++ b/lib/Analysis/JumpInstrTableInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; @@ -28,7 +29,21 @@ ImmutablePass *llvm::createJumpInstrTableInfoPass() { return new JumpInstrTableInfo(); } -JumpInstrTableInfo::JumpInstrTableInfo() : ImmutablePass(ID), Tables() { +ModulePass *llvm::createJumpInstrTableInfoPass(unsigned Bound) { + // This cast is always safe, since Bound is always in a subset of uint64_t. + uint64_t B = static_cast(Bound); + return new JumpInstrTableInfo(B); +} + +JumpInstrTableInfo::JumpInstrTableInfo(uint64_t ByteAlign) + : ImmutablePass(ID), Tables(), ByteAlignment(ByteAlign) { + if (!llvm::isPowerOf2_64(ByteAlign)) { + // Note that we don't explicitly handle overflow here, since we handle the 0 + // case explicitly when a caller actually tries to create jumptable entries, + // and this is the return value on overflow. + ByteAlignment = llvm::NextPowerOf2(ByteAlign); + } + initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index e073616..767da4e 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -48,7 +48,7 @@ static void findCallees( } for (Value *Op : C->operand_values()) - if (Visited.insert(cast(Op))) + if (Visited.insert(cast(Op)).second) Worklist.push_back(cast(Op)); } } @@ -66,7 +66,7 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) for (Instruction &I : BB) for (Value *Op : I.operand_values()) if (Constant *C = dyn_cast(Op)) - if (Visited.insert(C)) + if (Visited.insert(C).second) Worklist.push_back(C); // We've collected all the constant (and thus potentially function or @@ -113,7 +113,7 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) { SmallPtrSet Visited; for (GlobalVariable &GV : M.globals()) if (GV.hasInitializer()) - if (Visited.insert(GV.getInitializer())) + if (Visited.insert(GV.getInitializer()).second) Worklist.push_back(GV.getInitializer()); DEBUG(dbgs() << " Adding functions referenced by global initializers to the " @@ -688,7 +688,7 @@ static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N, SmallPtrSetImpl &Printed) { // Recurse depth first through the nodes. for (LazyCallGraph::Node &ChildN : N) - if (Printed.insert(&ChildN)) + if (Printed.insert(&ChildN).second) printNodes(OS, ChildN, Printed); OS << " Call edges in function: " << N.getFunction().getName() << "\n"; @@ -717,7 +717,7 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M, SmallPtrSet Printed; for (LazyCallGraph::Node &N : G) - if (Printed.insert(&N)) + if (Printed.insert(&N).second) printNodes(OS, N, Printed); for (LazyCallGraph::SCC &SCC : G.postorder_sccs()) diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 9f919f7..c712c9f 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -15,12 +15,14 @@ #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" @@ -38,6 +40,7 @@ using namespace PatternMatch; char LazyValueInfo::ID = 0; INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) @@ -338,6 +341,13 @@ namespace { /// during a query. It basically emulates the callstack of the naive /// recursive value lookup process. std::stack > BlockValueStack; + + /// A pointer to the cache of @llvm.assume calls. + AssumptionTracker *AT; + /// An optional DL pointer. + const DataLayout *DL; + /// An optional DT pointer. + DominatorTree *DT; friend struct LVIValueHandle; @@ -364,7 +374,8 @@ namespace { LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T, - LVILatticeVal &Result); + LVILatticeVal &Result, + Instruction *CxtI = nullptr); bool hasBlockValue(Value *Val, BasicBlock *BB); // These methods process one work item and may add more. A false value @@ -377,6 +388,8 @@ namespace { PHINode *PN, BasicBlock *BB); bool solveBlockValueConstantRange(LVILatticeVal &BBLV, Instruction *BBI, BasicBlock *BB); + void mergeAssumeBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV, + Instruction *BBI); void solve(); @@ -387,11 +400,18 @@ namespace { public: /// getValueInBlock - This is the query interface to determine the lattice /// value for the specified Value* at the end of the specified block. - LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB); + LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB, + Instruction *CxtI = nullptr); + + /// getValueAt - This is the query interface to determine the lattice + /// value for the specified Value* at the specified instruction (generally + /// from an assume intrinsic). + LVILatticeVal getValueAt(Value *V, Instruction *CxtI); /// getValueOnEdge - This is the query interface to determine the lattice /// value for the specified Value* that is true on the specified edge. - LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB); + LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB, + Instruction *CxtI = nullptr); /// threadEdge - This is the update interface to inform the cache that an /// edge from PredBB to OldSucc has been threaded to be from PredBB to @@ -408,6 +428,10 @@ namespace { ValueCache.clear(); OverDefinedCache.clear(); } + + LazyValueInfoCache(AssumptionTracker *AT, + const DataLayout *DL = nullptr, + DominatorTree *DT = nullptr) : AT(AT), DL(DL), DT(DT) {} }; } // end anonymous namespace @@ -500,7 +524,6 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { // cache needs updating, i.e. if we have solve a new value or not. OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this); - // If we've already computed this block's value, return it. if (!BBLV.isUndefined()) { DEBUG(dbgs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n'); @@ -669,7 +692,10 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, BasicBlock *PhiBB = PN->getIncomingBlock(i); Value *PhiVal = PN->getIncomingValue(i); LVILatticeVal EdgeResult; - EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult); + // Note that we can provide PN as the context value to getEdgeValue, even + // though the results will be cached, because PN is the value being used as + // the cache key in the caller. + EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN); if (EdgesMissing) continue; @@ -694,6 +720,36 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV, return true; } +static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, + LVILatticeVal &Result, + bool isTrueDest = true); + +// If we can determine a constant range for the value Val at the context +// provided by the instruction BBI, then merge it into BBLV. If we did find a +// constant range, return true. +void LazyValueInfoCache::mergeAssumeBlockValueConstantRange( + Value *Val, LVILatticeVal &BBLV, Instruction *BBI) { + BBI = BBI ? BBI : dyn_cast(Val); + if (!BBI) + return; + + for (auto &I : AT->assumptions(BBI->getParent()->getParent())) { + if (!isValidAssumeForContext(I, BBI, DL, DT)) + continue; + + Value *C = I->getArgOperand(0); + if (ICmpInst *ICI = dyn_cast(C)) { + LVILatticeVal Result; + if (getValueFromFromCondition(Val, ICI, Result)) { + if (BBLV.isOverdefined()) + BBLV = Result; + else + BBLV.mergeIn(Result); + } + } + } +} + bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, Instruction *BBI, BasicBlock *BB) { @@ -704,6 +760,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, } LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); + mergeAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI); if (!LHSVal.isConstantRange()) { BBLV.markOverdefined(); return true; @@ -775,6 +832,47 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV, return true; } +bool getValueFromFromCondition(Value *Val, ICmpInst *ICI, + LVILatticeVal &Result, bool isTrueDest) { + if (ICI && isa(ICI->getOperand(1))) { + if (ICI->isEquality() && ICI->getOperand(0) == Val) { + // We know that V has the RHS constant if this is a true SETEQ or + // false SETNE. + if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) + Result = LVILatticeVal::get(cast(ICI->getOperand(1))); + else + Result = LVILatticeVal::getNot(cast(ICI->getOperand(1))); + return true; + } + + // Recognize the range checking idiom that InstCombine produces. + // (X-C1) u< C2 --> [C1, C1+C2) + ConstantInt *NegOffset = nullptr; + if (ICI->getPredicate() == ICmpInst::ICMP_ULT) + match(ICI->getOperand(0), m_Add(m_Specific(Val), + m_ConstantInt(NegOffset))); + + ConstantInt *CI = dyn_cast(ICI->getOperand(1)); + if (CI && (ICI->getOperand(0) == Val || NegOffset)) { + // Calculate the range of values that would satisfy the comparison. + ConstantRange CmpRange(CI->getValue()); + ConstantRange TrueValues = + ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); + + if (NegOffset) // Apply the offset from above. + TrueValues = TrueValues.subtract(NegOffset->getValue()); + + // If we're interested in the false dest, invert the condition. + if (!isTrueDest) TrueValues = TrueValues.inverse(); + + Result = LVILatticeVal::getRange(TrueValues); + return true; + } + } + + return false; +} + /// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if /// Val is not constrained on the edge. static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, @@ -801,41 +899,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // If the condition of the branch is an equality comparison, we may be // able to infer the value. ICmpInst *ICI = dyn_cast(BI->getCondition()); - if (ICI && isa(ICI->getOperand(1))) { - if (ICI->isEquality() && ICI->getOperand(0) == Val) { - // We know that V has the RHS constant if this is a true SETEQ or - // false SETNE. - if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ)) - Result = LVILatticeVal::get(cast(ICI->getOperand(1))); - else - Result = LVILatticeVal::getNot(cast(ICI->getOperand(1))); - return true; - } - - // Recognize the range checking idiom that InstCombine produces. - // (X-C1) u< C2 --> [C1, C1+C2) - ConstantInt *NegOffset = nullptr; - if (ICI->getPredicate() == ICmpInst::ICMP_ULT) - match(ICI->getOperand(0), m_Add(m_Specific(Val), - m_ConstantInt(NegOffset))); - - ConstantInt *CI = dyn_cast(ICI->getOperand(1)); - if (CI && (ICI->getOperand(0) == Val || NegOffset)) { - // Calculate the range of values that would satisfy the comparison. - ConstantRange CmpRange(CI->getValue()); - ConstantRange TrueValues = - ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange); - - if (NegOffset) // Apply the offset from above. - TrueValues = TrueValues.subtract(NegOffset->getValue()); - - // If we're interested in the false dest, invert the condition. - if (!isTrueDest) TrueValues = TrueValues.inverse(); - - Result = LVILatticeVal::getRange(TrueValues); - return true; - } - } + if (getValueFromFromCondition(Val, ICI, Result, isTrueDest)) + return true; } } @@ -869,7 +934,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, /// \brief Compute the value of Val on the edge BBFrom -> BBTo, or the value at /// the basic block if the edge does not constraint Val. bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, - BasicBlock *BBTo, LVILatticeVal &Result) { + BasicBlock *BBTo, LVILatticeVal &Result, + Instruction *CxtI) { // If already a constant, there is nothing to compute. if (Constant *VC = dyn_cast(Val)) { Result = LVILatticeVal::get(VC); @@ -891,6 +957,10 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // Try to intersect ranges of the BB and the constraint on the edge. LVILatticeVal InBlock = getBlockValue(Val, BBFrom); + mergeAssumeBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator()); + // See note on the use of the CxtI with mergeAssumeBlockValueConstantRange, + // and caching, below. + mergeAssumeBlockValueConstantRange(Val, InBlock, CxtI); if (!InBlock.isConstantRange()) return true; @@ -907,30 +977,54 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // if we couldn't compute the value on the edge, use the value from the BB Result = getBlockValue(Val, BBFrom); + mergeAssumeBlockValueConstantRange(Val, Result, BBFrom->getTerminator()); + // We can use the context instruction (generically the ultimate instruction + // the calling pass is trying to simplify) here, even though the result of + // this function is generally cached when called from the solve* functions + // (and that cached result might be used with queries using a different + // context instruction), because when this function is called from the solve* + // functions, the context instruction is not provided. When called from + // LazyValueInfoCache::getValueOnEdge, the context instruction is provided, + // but then the result is not cached. + mergeAssumeBlockValueConstantRange(Val, Result, CxtI); return true; } -LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) { +LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB, + Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '" << BB->getName() << "'\n"); BlockValueStack.push(std::make_pair(BB, V)); solve(); LVILatticeVal Result = getBlockValue(V, BB); + mergeAssumeBlockValueConstantRange(V, Result, CxtI); + + DEBUG(dbgs() << " Result = " << Result << "\n"); + return Result; +} + +LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) { + DEBUG(dbgs() << "LVI Getting value " << *V << " at '" + << CxtI->getName() << "'\n"); + + LVILatticeVal Result; + mergeAssumeBlockValueConstantRange(V, Result, CxtI); DEBUG(dbgs() << " Result = " << Result << "\n"); return Result; } LVILatticeVal LazyValueInfoCache:: -getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) { +getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, + Instruction *CxtI) { DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '" << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); LVILatticeVal Result; - if (!getEdgeValue(V, FromBB, ToBB, Result)) { + if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) { solve(); - bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result); + bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result, CxtI); (void)WasFastQuery; assert(WasFastQuery && "More work to do after problem solved?"); } @@ -1004,39 +1098,51 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, //===----------------------------------------------------------------------===// /// getCache - This lazily constructs the LazyValueInfoCache. -static LazyValueInfoCache &getCache(void *&PImpl) { +static LazyValueInfoCache &getCache(void *&PImpl, + AssumptionTracker *AT, + const DataLayout *DL = nullptr, + DominatorTree *DT = nullptr) { if (!PImpl) - PImpl = new LazyValueInfoCache(); + PImpl = new LazyValueInfoCache(AT, DL, DT); return *static_cast(PImpl); } bool LazyValueInfo::runOnFunction(Function &F) { - if (PImpl) - getCache(PImpl).clear(); + AT = &getAnalysis(); + + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; DataLayoutPass *DLP = getAnalysisIfAvailable(); DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); + if (PImpl) + getCache(PImpl, AT, DL, DT).clear(); + // Fully lazy. return false; } void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); AU.addRequired(); } void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { - delete &getCache(PImpl); + delete &getCache(PImpl, AT); PImpl = nullptr; } } -Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { - LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB); +Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, + Instruction *CxtI) { + LVILatticeVal Result = + getCache(PImpl, AT, DL, DT).getValueInBlock(V, BB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1051,8 +1157,10 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { /// getConstantOnEdge - Determine whether the specified value is known to be a /// constant on the specified edge. Return null if not. Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, - BasicBlock *ToBB) { - LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); + BasicBlock *ToBB, + Instruction *CxtI) { + LVILatticeVal Result = + getCache(PImpl, AT, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); if (Result.isConstant()) return Result.getConstant(); @@ -1064,51 +1172,47 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, return nullptr; } -/// getPredicateOnEdge - Determine whether the specified value comparison -/// with a constant is known to be true or false on the specified CFG edge. -/// Pred is a CmpInst predicate. -LazyValueInfo::Tristate -LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, - BasicBlock *FromBB, BasicBlock *ToBB) { - LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); - +static LazyValueInfo::Tristate +getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result, + const DataLayout *DL, TargetLibraryInfo *TLI) { + // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; if (Result.isConstant()) { Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL, TLI); if (ConstantInt *ResCI = dyn_cast(Res)) - return ResCI->isZero() ? False : True; - return Unknown; + return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True; + return LazyValueInfo::Unknown; } if (Result.isConstantRange()) { ConstantInt *CI = dyn_cast(C); - if (!CI) return Unknown; + if (!CI) return LazyValueInfo::Unknown; ConstantRange CR = Result.getConstantRange(); if (Pred == ICmpInst::ICMP_EQ) { if (!CR.contains(CI->getValue())) - return False; + return LazyValueInfo::False; if (CR.isSingleElement() && CR.contains(CI->getValue())) - return True; + return LazyValueInfo::True; } else if (Pred == ICmpInst::ICMP_NE) { if (!CR.contains(CI->getValue())) - return True; + return LazyValueInfo::True; if (CR.isSingleElement() && CR.contains(CI->getValue())) - return False; + return LazyValueInfo::False; } // Handle more complex predicates. ConstantRange TrueValues = ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); if (TrueValues.contains(CR)) - return True; + return LazyValueInfo::True; if (TrueValues.inverse().contains(CR)) - return False; - return Unknown; + return LazyValueInfo::False; + return LazyValueInfo::Unknown; } if (Result.isNotConstant()) { @@ -1120,26 +1224,48 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, Result.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) - return False; + return LazyValueInfo::False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, Result.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) - return True; + return LazyValueInfo::True; } - return Unknown; + return LazyValueInfo::Unknown; } - return Unknown; + return LazyValueInfo::Unknown; +} + +/// getPredicateOnEdge - Determine whether the specified value comparison +/// with a constant is known to be true or false on the specified CFG edge. +/// Pred is a CmpInst predicate. +LazyValueInfo::Tristate +LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, + BasicBlock *FromBB, BasicBlock *ToBB, + Instruction *CxtI) { + LVILatticeVal Result = + getCache(PImpl, AT, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI); + + return getPredicateResult(Pred, C, Result, DL, TLI); +} + +LazyValueInfo::Tristate +LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C, + Instruction *CxtI) { + LVILatticeVal Result = + getCache(PImpl, AT, DL, DT).getValueAt(V, CxtI); + + return getPredicateResult(Pred, C, Result, DL, TLI); } void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { - if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc); + if (PImpl) getCache(PImpl, AT, DL, DT).threadEdge(PredBB, OldSucc, NewSucc); } void LazyValueInfo::eraseBlock(BasicBlock *BB) { - if (PImpl) getCache(PImpl).eraseBlock(BB); + if (PImpl) getCache(PImpl, AT, DL, DT).eraseBlock(BB); } diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 7d4e254..23639e7 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -18,7 +18,7 @@ #include "llvm/IR/Function.h" using namespace llvm; -/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This +/// This impl pointer in ~LibCallInfo is actually a StringMap. This /// helper does the cast. static StringMap *getMap(void *Ptr) { return static_cast *>(Ptr); @@ -38,7 +38,7 @@ const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { } -/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to +/// Return the LibCallFunctionInfo object corresponding to /// the specified function if we have it. If not, return null. const LibCallFunctionInfo * LibCallInfo::getFunctionInfo(const Function *F) const { diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index b14f329..8ee9b8a 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/Lint.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -96,11 +97,12 @@ namespace { Value *findValue(Value *V, bool OffsetOk) const; Value *findValueImpl(Value *V, bool OffsetOk, - SmallPtrSet &Visited) const; + SmallPtrSetImpl &Visited) const; public: Module *Mod; AliasAnalysis *AA; + AssumptionTracker *AT; DominatorTree *DT; const DataLayout *DL; TargetLibraryInfo *TLI; @@ -118,6 +120,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -151,6 +154,7 @@ namespace { char Lint::ID = 0; INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -175,6 +179,7 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); AA = &getAnalysis(); + AT = &getAnalysis(); DT = &getAnalysis().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable(); DL = DLP ? &DLP->getDataLayout() : nullptr; @@ -504,7 +509,8 @@ void Lint::visitShl(BinaryOperator &I) { "Undefined result: Shift count out of range", &I); } -static bool isZero(Value *V, const DataLayout *DL) { +static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT, + AssumptionTracker *AT) { // Assume undef could be zero. if (isa(V)) return true; @@ -513,7 +519,8 @@ static bool isZero(Value *V, const DataLayout *DL) { if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL, + 0, AT, dyn_cast(V), DT); return KnownZero.isAllOnesValue(); } @@ -543,22 +550,22 @@ static bool isZero(Value *V, const DataLayout *DL) { } void Lint::visitSDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL), + Assert1(!isZero(I.getOperand(1), DL, DT, AT), "Undefined behavior: Division by zero", &I); } void Lint::visitUDiv(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL), + Assert1(!isZero(I.getOperand(1), DL, DT, AT), "Undefined behavior: Division by zero", &I); } void Lint::visitSRem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL), + Assert1(!isZero(I.getOperand(1), DL, DT, AT), "Undefined behavior: Division by zero", &I); } void Lint::visitURem(BinaryOperator &I) { - Assert1(!isZero(I.getOperand(1), DL), + Assert1(!isZero(I.getOperand(1), DL, DT, AT), "Undefined behavior: Division by zero", &I); } @@ -622,9 +629,9 @@ Value *Lint::findValue(Value *V, bool OffsetOk) const { /// findValueImpl - Implementation helper for findValue. Value *Lint::findValueImpl(Value *V, bool OffsetOk, - SmallPtrSet &Visited) const { + SmallPtrSetImpl &Visited) const { // Detect self-referential values. - if (!Visited.insert(V)) + if (!Visited.insert(V).second) return UndefValue::get(V->getType()); // TODO: Look through sext or zext cast, when the result is known to @@ -638,7 +645,8 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, BasicBlock *BB = L->getParent(); SmallPtrSet VisitedBlocks; for (;;) { - if (!VisitedBlocks.insert(BB)) break; + if (!VisitedBlocks.insert(BB).second) + break; if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), BB, BBI, 6, AA)) return findValueImpl(U, OffsetOk, Visited); @@ -678,7 +686,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast(V)) { - if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT)) + if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AT)) return findValueImpl(W, OffsetOk, Visited); } else if (ConstantExpr *CE = dyn_cast(V)) { if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 005d309..bb0d60e 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -22,25 +22,29 @@ #include "llvm/IR/Operator.h" using namespace llvm; -/// AreEquivalentAddressValues - Test if A and B will obviously have the same -/// value. This includes recognizing that %t0 and %t1 will have the same +/// \brief Test if A and B will obviously have the same value. +/// +/// This includes recognizing that %t0 and %t1 will have the same /// value in code like this: +/// \code /// %t0 = getelementptr \@a, 0, 3 /// store i32 0, i32* %t0 /// %t1 = getelementptr \@a, 0, 3 /// %t2 = load i32* %t1 +/// \endcode /// static bool AreEquivalentAddressValues(const Value *A, const Value *B) { // Test if the values are trivially equivalent. - if (A == B) return true; + if (A == B) + return true; // Test if the values come from identical arithmetic instructions. // Use isIdenticalToWhenDefined instead of isIdenticalTo because // this function is only used when one address use dominates the // other, which means that they'll always either have the same // value or one of them will have an undefined value. - if (isa(A) || isa(A) || - isa(A) || isa(A)) + if (isa(A) || isa(A) || isa(A) || + isa(A)) if (const Instruction *BI = dyn_cast(B)) if (cast(A)->isIdenticalToWhenDefined(BI)) return true; @@ -49,15 +53,19 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { return false; } -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. If it is not obviously safe to load from the -/// specified pointer, we do a quick local scan of the basic block containing -/// ScanFrom, to determine if the address is already accessed. +/// \brief Check if executing a load of this pointer value cannot trap. +/// +/// If it is not obviously safe to load from the specified pointer, we do +/// a quick local scan of the basic block containing \c ScanFrom, to determine +/// if the address is already accessed. +/// +/// This uses the pointee type to determine how many bytes need to be safe to +/// load from the pointer. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const DataLayout *TD) { + unsigned Align, const DataLayout *DL) { int64_t ByteOffset = 0; Value *Base = V; - Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD); + Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); if (ByteOffset < 0) // out of bounds return false; @@ -69,26 +77,29 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, BaseType = AI->getAllocatedType(); BaseAlign = AI->getAlignment(); } else if (const GlobalVariable *GV = dyn_cast(Base)) { - // Global variables are safe to load from but their size cannot be - // guaranteed if they are overridden. + // Global variables are not necessarily safe to load from if they are + // overridden. Their size may change or they may be weak and require a test + // to determine if they were in fact provided. if (!GV->mayBeOverridden()) { BaseType = GV->getType()->getElementType(); BaseAlign = GV->getAlignment(); } } - if (BaseType && BaseType->isSized()) { - if (TD && BaseAlign == 0) - BaseAlign = TD->getPrefTypeAlignment(BaseType); + PointerType *AddrTy = cast(V->getType()); + uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0; - if (Align <= BaseAlign) { - if (!TD) - return true; // Loading directly from an alloca or global is OK. + // If we found a base allocated type from either an alloca or global variable, + // try to see if we are definitively within the allocated region. We need to + // know the size of the base type and the loaded type to do anything in this + // case, so only try this when we have the DataLayout available. + if (BaseType && BaseType->isSized() && DL) { + if (BaseAlign == 0) + BaseAlign = DL->getPrefTypeAlignment(BaseType); + if (Align <= BaseAlign) { // Check if the load is within the bounds of the underlying object. - PointerType *AddrTy = cast(V->getType()); - uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); - if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && + if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) && (Align == 0 || (ByteOffset % Align) == 0)) return true; } @@ -101,6 +112,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // the load entirely). BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + // We can at least always strip pointer casts even though we can't use the + // base here. + V = V->stripPointerCasts(); + while (BBI != E) { --BBI; @@ -110,46 +125,62 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, !isa(BBI)) return false; - if (LoadInst *LI = dyn_cast(BBI)) { - if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true; - } else if (StoreInst *SI = dyn_cast(BBI)) { - if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true; - } + Value *AccessedPtr; + if (LoadInst *LI = dyn_cast(BBI)) + AccessedPtr = LI->getPointerOperand(); + else if (StoreInst *SI = dyn_cast(BBI)) + AccessedPtr = SI->getPointerOperand(); + else + continue; + + // Handle trivial cases even w/o DataLayout or other work. + if (AccessedPtr == V) + return true; + + if (!DL) + continue; + + auto *AccessedTy = cast(AccessedPtr->getType()); + if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && + LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType())) + return true; } return false; } -/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the -/// instruction before ScanFrom) checking to see if we have the value at the +/// \brief Scan the ScanBB block backwards to see if we have the value at the /// memory address *Ptr locally available within a small number of instructions. -/// If the value is available, return it. /// -/// If not, return the iterator for the last validated instruction that the -/// value would be live through. If we scanned the entire block and didn't find -/// something that invalidates *Ptr or provides it, ScanFrom would be left at -/// begin() and this returns null. ScanFrom could also be left +/// The scan starts from \c ScanFrom. \c MaxInstsToScan specifies the maximum +/// instructions to scan in the block. If it is set to \c 0, it will scan the whole +/// block. +/// +/// If the value is available, this function returns it. If not, it returns the +/// iterator for the last validated instruction that the value would be live +/// through. If we scanned the entire block and didn't find something that +/// invalidates \c *Ptr or provides it, \c ScanFrom is left at the last +/// instruction processed and this returns null. /// -/// MaxInstsToScan specifies the maximum instructions to scan in the block. If -/// it is set to 0, it will scan the whole block. You can also optionally -/// specify an alias analysis implementation, which makes this more precise. +/// You can also optionally specify an alias analysis implementation, which +/// makes this more precise. /// -/// If TBAATag is non-null and a load or store is found, the TBAA tag from the -/// load or store is recorded there. If there is no TBAA tag or if no access -/// is found, it is left unmodified. +/// If \c AATags is non-null and a load or store is found, the AA tags from the +/// load or store are recorded there. If there are no AA tags or if no access is +/// found, it is left unmodified. Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA, - MDNode **TBAATag) { - if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + AliasAnalysis *AA, AAMDNodes *AATags) { + if (MaxInstsToScan == 0) + MaxInstsToScan = ~0U; + + Type *AccessTy = cast(Ptr->getType())->getElementType(); // If we're using alias analysis to disambiguate get the size of *Ptr. - uint64_t AccessSize = 0; - if (AA) { - Type *AccessTy = cast(Ptr->getType())->getElementType(); - AccessSize = AA->getTypeStoreSize(AccessTy); - } - + uint64_t AccessSize = AA ? AA->getTypeStoreSize(AccessTy) : 0; + + Value *StrippedPtr = Ptr->stripPointerCasts(); + while (ScanFrom != ScanBB->begin()) { // We must ignore debug info directives when counting (otherwise they // would affect codegen). @@ -159,62 +190,71 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // Restore ScanFrom to expected value in case next test succeeds ScanFrom++; - + // Don't scan huge blocks. - if (MaxInstsToScan-- == 0) return nullptr; - + if (MaxInstsToScan-- == 0) + return nullptr; + --ScanFrom; // If this is a load of Ptr, the loaded value is available. // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) { - if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); + if (AreEquivalentAddressValues( + LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) && + CastInst::isBitCastable(LI->getType(), AccessTy)) { + if (AATags) + LI->getAAMetadata(*AATags); return LI; } - + if (StoreInst *SI = dyn_cast(Inst)) { + Value *StorePtr = SI->getPointerOperand()->stripPointerCasts(); // If this is a store through Ptr, the value is available! // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) { - if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa); + if (AreEquivalentAddressValues(StorePtr, StrippedPtr) && + CastInst::isBitCastable(SI->getValueOperand()->getType(), AccessTy)) { + if (AATags) + SI->getAAMetadata(*AATags); return SI->getOperand(0); } - - // If Ptr is an alloca and this is a store to a different alloca, ignore - // the store. This is a trivial form of alias analysis that is important - // for reg2mem'd code. - if ((isa(Ptr) || isa(Ptr)) && - (isa(SI->getOperand(1)) || - isa(SI->getOperand(1)))) + + // If both StrippedPtr and StorePtr reach all the way to an alloca or + // global and they are different, ignore the store. This is a trivial form + // of alias analysis that is important for reg2mem'd code. + if ((isa(StrippedPtr) || isa(StrippedPtr)) && + (isa(StorePtr) || isa(StorePtr)) && + StrippedPtr != StorePtr) continue; - + // If we have alias analysis and it says the store won't modify the loaded // value, ignore the store. if (AA && - (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & + AliasAnalysis::Mod) == 0) continue; - + // Otherwise the store that may or may not alias the pointer, bail out. ++ScanFrom; return nullptr; } - + // If this is some other instruction that may clobber Ptr, bail out. if (Inst->mayWriteToMemory()) { // If alias analysis claims that it really won't modify the load, // ignore it. if (AA && - (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & + AliasAnalysis::Mod) == 0) continue; - + // May modify the pointer, bail out. ++ScanFrom; return nullptr; } } - + // Got to the start of the block, we didn't find it, but are done for this // block. return nullptr; diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 46c0eaa..b1f62c4 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -307,7 +308,8 @@ bool Loop::isAnnotatedParallel() const { // directly or indirectly through another list metadata (in case of // nested parallel loops). The loop identifier metadata refers to // itself so we can check both cases with the same routine. - MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access"); + MDNode *loopIdMD = + II->getMetadata(LLVMContext::MD_mem_parallel_loop_access); if (!loopIdMD) return false; diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 7bd866e..190abc7 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -76,6 +76,9 @@ void LPPassManager::deleteLoopFromQueue(Loop *L) { LI->updateUnloop(L); + // Notify passes that the loop is being deleted. + deleteSimpleAnalysisLoop(L); + // If L is current loop then skip rest of the passes and let // runOnFunction remove L from LQ. Otherwise, remove L from LQ now // and continue applying other passes on CurrentLoop. @@ -164,6 +167,14 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) { } } +/// Invoke deleteAnalysisLoop hook for all passes. +void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + LoopPass *LP = getContainedPass(Index); + LP->deleteAnalysisLoop(L); + } +} + // Recurse through all subloops and all loops into LQ. static void addLoopIntoQueue(Loop *L, std::deque &LQ) { diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 64d339f..08b41fe 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -332,7 +332,11 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { TLIFn == LibFunc::ZdlPv || // operator delete(void*) TLIFn == LibFunc::ZdaPv) // operator delete[](void*) ExpectedNumParams = 1; - else if (TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) + else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint) + TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong) + TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) + TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint) + TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong) TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) ExpectedNumParams = 2; else @@ -412,7 +416,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { if (Instruction *I = dyn_cast(V)) { // If we have already seen this instruction, bail out. Cycles can happen in // unreachable code after constant propagation. - if (!SeenInsts.insert(I)) + if (!SeenInsts.insert(I).second) return unknown(); if (GEPOperator *GEP = dyn_cast(V)) @@ -648,7 +652,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { // Record the pointers that were handled in this run, so that they can be // cleaned later if something fails. We also use this set to break cycles that // can occur in dead code. - if (!SeenVals.insert(V)) { + if (!SeenVals.insert(V).second) { Result = unknown(); } else if (GEPOperator *GEP = dyn_cast(V)) { Result = visitGEPOperator(*GEP); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 9eaf109..187eada 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" @@ -48,13 +49,17 @@ STATISTIC(NumCacheCompleteNonLocalPtr, "Number of block queries that were completely cached"); // Limit for the number of instructions to scan in a block. -static const int BlockScanLimit = 100; +static const unsigned int BlockScanLimit = 100; + +// Limit on the number of memdep results to process. +static const unsigned int NumResultsLimit = 100; char MemoryDependenceAnalysis::ID = 0; // Register this pass... INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -83,11 +88,13 @@ void MemoryDependenceAnalysis::releaseMemory() { /// void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); AU.addRequiredTransitive(); } bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis(); + AT = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); DL = DLP ? &DLP->getDataLayout() : nullptr; DominatorTreeWrapperPass *DTWP = @@ -158,29 +165,32 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, return AliasAnalysis::Mod; } - if (const IntrinsicInst *II = dyn_cast(Inst)) + if (const IntrinsicInst *II = dyn_cast(Inst)) { + AAMDNodes AAInfo; + switch (II->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: + II->getAAMetadata(AAInfo); Loc = AliasAnalysis::Location(II->getArgOperand(1), cast(II->getArgOperand(0)) - ->getZExtValue(), - II->getMetadata(LLVMContext::MD_tbaa)); + ->getZExtValue(), AAInfo); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return AliasAnalysis::Mod; case Intrinsic::invariant_end: + II->getAAMetadata(AAInfo); Loc = AliasAnalysis::Location(II->getArgOperand(2), cast(II->getArgOperand(1)) - ->getZExtValue(), - II->getMetadata(LLVMContext::MD_tbaa)); + ->getZExtValue(), AAInfo); // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return AliasAnalysis::Mod; default: break; } + } // Otherwise, just do the coarse-grained thing that always works. if (Inst->mayWriteToMemory()) @@ -367,6 +377,36 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; bool isInvariantLoad = false; + + // We must be careful with atomic accesses, as they may allow another thread + // to touch this location, cloberring it. We are conservative: if the + // QueryInst is not a simple (non-atomic) memory access, we automatically + // return getClobber. + // If it is simple, we know based on the results of + // "Compiler testing via a theory of sound optimisations in the C11/C++11 + // memory model" in PLDI 2013, that a non-atomic location can only be + // clobbered between a pair of a release and an acquire action, with no + // access to the location in between. + // Here is an example for giving the general intuition behind this rule. + // In the following code: + // store x 0; + // release action; [1] + // acquire action; [4] + // %val = load x; + // It is unsafe to replace %val by 0 because another thread may be running: + // acquire action; [2] + // store x 42; + // release action; [3] + // with synchronization from 1 to 2 and from 3 to 4, resulting in %val + // being 42. A key property of this program however is that if either + // 1 or 4 were missing, there would be a race between the store of 42 + // either the store of 0 or the load (making the whole progam racy). + // The paper mentionned above shows that the same property is respected + // by every program that can detect any optimisation of that kind: either + // it is racy (undefined) or there is a release followed by an acquire + // between the pair of accesses under consideration. + bool HasSeenAcquire = false; + if (isLoad && QueryInst) { LoadInst *LI = dyn_cast(QueryInst); if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) @@ -404,10 +444,37 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. + // One exception is atomic loads: a value can depend on an atomic load that it + // does not alias with when this atomic load indicates that another thread may + // be accessing the location. if (LoadInst *LI = dyn_cast(Inst)) { // Atomic loads have complications involved. + // A Monotonic (or higher) load is OK if the query inst is itself not atomic. + // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any + // release store will know to return getClobber. // FIXME: This is overly conservative. - if (!LI->isUnordered()) + if (!LI->isUnordered()) { + if (!QueryInst) + return MemDepResult::getClobber(LI); + if (auto *QueryLI = dyn_cast(QueryInst)) { + if (!QueryLI->isSimple()) + return MemDepResult::getClobber(LI); + } else if (auto *QuerySI = dyn_cast(QueryInst)) { + if (!QuerySI->isSimple()) + return MemDepResult::getClobber(LI); + } else if (QueryInst->mayReadOrWriteMemory()) { + return MemDepResult::getClobber(LI); + } + + if (isAtLeastAcquire(LI->getOrdering())) + HasSeenAcquire = true; + } + + // FIXME: this is overly conservative. + // While volatile access cannot be eliminated, they do not have to clobber + // non-aliasing locations, as normal accesses can for example be reordered + // with volatile accesses. + if (LI->isVolatile()) return MemDepResult::getClobber(LI); AliasAnalysis::Location LoadLoc = AA->getLocation(LI); @@ -466,8 +533,32 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (StoreInst *SI = dyn_cast(Inst)) { // Atomic stores have complications involved. + // A Monotonic store is OK if the query inst is itself not atomic. + // A Release (or higher) store further requires that no acquire load + // has been seen. // FIXME: This is overly conservative. - if (!SI->isUnordered()) + if (!SI->isUnordered()) { + if (!QueryInst) + return MemDepResult::getClobber(SI); + if (auto *QueryLI = dyn_cast(QueryInst)) { + if (!QueryLI->isSimple()) + return MemDepResult::getClobber(SI); + } else if (auto *QuerySI = dyn_cast(QueryInst)) { + if (!QuerySI->isSimple()) + return MemDepResult::getClobber(SI); + } else if (QueryInst->mayReadOrWriteMemory()) { + return MemDepResult::getClobber(SI); + } + + if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering())) + return MemDepResult::getClobber(SI); + } + + // FIXME: this is overly conservative. + // While volatile access cannot be eliminated, they do not have to clobber + // non-aliasing locations, as normal accesses can for example be reordered + // with volatile accesses. + if (SI->isVolatile()) return MemDepResult::getClobber(SI); // If alias analysis can tell that this store is guaranteed to not modify @@ -685,7 +776,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { DirtyBlocks.pop_back(); // Already processed this block? - if (!Visited.insert(DirtyBB)) + if (!Visited.insert(DirtyBB).second) continue; // Do a binary search to see if we already have an entry for this block in @@ -775,7 +866,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, "Can't get pointer deps of a non-pointer!"); Result.clear(); - PHITransAddr Address(const_cast(Loc.Ptr), DL); + PHITransAddr Address(const_cast(Loc.Ptr), DL, AT); // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying @@ -861,7 +952,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, return Dep; } -/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain +/// SortNonLocalDepInfoCache - Sort the NonLocalDepInfo cache, given a certain /// number of elements in the array that are already properly ordered. This is /// optimized for the case when only a few entries are added. static void @@ -922,10 +1013,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Set up a temporary NLPI value. If the map doesn't yet have an entry for // CacheKey, this value will be inserted as the associated value. Otherwise, // it'll be ignored, and we'll have to check to see if the cached size and - // tbaa tag are consistent with the current query. + // aa tags are consistent with the current query. NonLocalPointerInfo InitialNLPI; InitialNLPI.Size = Loc.Size; - InitialNLPI.TBAATag = Loc.TBAATag; + InitialNLPI.AATags = Loc.AATags; // Get the NLPI for CacheKey, inserting one into the map if it doesn't // already have one. @@ -955,21 +1046,21 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SkipFirstBlock); } - // If the query's TBAATag is inconsistent with the cached one, + // If the query's AATags are inconsistent with the cached one, // conservatively throw out the cached data and restart the query with // no tag if needed. - if (CacheInfo->TBAATag != Loc.TBAATag) { - if (CacheInfo->TBAATag) { + if (CacheInfo->AATags != Loc.AATags) { + if (CacheInfo->AATags) { CacheInfo->Pair = BBSkipFirstBlockPair(); - CacheInfo->TBAATag = nullptr; + CacheInfo->AATags = AAMDNodes(); for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) if (Instruction *Inst = DI->getResult().getInst()) RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); CacheInfo->NonLocalDeps.clear(); } - if (Loc.TBAATag) - return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(), + if (Loc.AATags) + return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result, Visited, SkipFirstBlock); } @@ -1045,6 +1136,25 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); + // If we do process a large number of blocks it becomes very expensive and + // likely it isn't worth worrying about + if (Result.size() > NumResultsLimit) { + Worklist.clear(); + // Sort it now (if needed) so that recursive invocations of + // getNonLocalPointerDepFromBB and other routines that could reuse the + // cache value will only see properly sorted cache arrays. + if (Cache && NumSortedEntries != Cache->size()) { + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + NumSortedEntries = Cache->size(); + } + // Since we bail out, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set". Clear out the indicator for this. + CacheInfo->Pair = BBSkipFirstBlockPair(); + return true; + } + // Skip the first block if we have it. if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have @@ -1369,14 +1479,11 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); if (ReverseDepIt != ReverseLocalDeps.end()) { - SmallPtrSet &ReverseDeps = ReverseDepIt->second; // RemInst can't be the terminator if it has local stuff depending on it. - assert(!ReverseDeps.empty() && !isa(RemInst) && + assert(!ReverseDepIt->second.empty() && !isa(RemInst) && "Nothing can locally depend on a terminator"); - for (SmallPtrSet::iterator I = ReverseDeps.begin(), - E = ReverseDeps.end(); I != E; ++I) { - Instruction *InstDependingOnRemInst = *I; + for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) { assert(InstDependingOnRemInst != RemInst && "Already removed our local dep info"); @@ -1402,12 +1509,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepIt = ReverseNonLocalDeps.find(RemInst); if (ReverseDepIt != ReverseNonLocalDeps.end()) { - SmallPtrSet &Set = ReverseDepIt->second; - for (SmallPtrSet::iterator I = Set.begin(), E = Set.end(); - I != E; ++I) { - assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); + for (Instruction *I : ReverseDepIt->second) { + assert(I != RemInst && "Already removed NonLocalDep info for RemInst"); - PerInstNLInfo &INLD = NonLocalDeps[*I]; + PerInstNLInfo &INLD = NonLocalDeps[I]; // The information is now dirty! INLD.second = true; @@ -1419,7 +1524,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { DI->setResult(NewDirtyVal); if (Instruction *NextI = NewDirtyVal.getInst()) - ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); + ReverseDepsToAdd.push_back(std::make_pair(NextI, I)); } } @@ -1438,12 +1543,9 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = ReverseNonLocalPtrDeps.find(RemInst); if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { - SmallPtrSet &Set = ReversePtrDepIt->second; SmallVector,8> ReversePtrDepsToAdd; - for (SmallPtrSet::iterator I = Set.begin(), - E = Set.end(); I != E; ++I) { - ValueIsLoadPair P = *I; + for (ValueIsLoadPair P : ReversePtrDepIt->second) { assert(P.getPointer() != RemInst && "Already removed NonLocalPointerDeps info for RemInst"); @@ -1484,8 +1586,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { DEBUG(verifyRemoved(RemInst)); } /// verifyRemoved - Verify that the specified instruction does not occur -/// in our internal data structures. +/// in our internal data structures. This function verifies by asserting in +/// debug builds. void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { +#ifndef NDEBUG for (LocalDepMapType::const_iterator I = LocalDeps.begin(), E = LocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1514,18 +1618,16 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), E = ReverseLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); - for (SmallPtrSet::const_iterator II = I->second.begin(), - EE = I->second.end(); II != EE; ++II) - assert(*II != D && "Inst occurs in data structures"); + for (Instruction *Inst : I->second) + assert(Inst != D && "Inst occurs in data structures"); } for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), E = ReverseNonLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); - for (SmallPtrSet::const_iterator II = I->second.begin(), - EE = I->second.end(); II != EE; ++II) - assert(*II != D && "Inst occurs in data structures"); + for (Instruction *Inst : I->second) + assert(Inst != D && "Inst occurs in data structures"); } for (ReverseNonLocalPtrDepTy::const_iterator @@ -1533,11 +1635,10 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in rev NLPD map"); - for (SmallPtrSet::const_iterator II = I->second.begin(), - E = I->second.end(); II != E; ++II) - assert(*II != ValueIsLoadPair(D, false) && - *II != ValueIsLoadPair(D, true) && + for (ValueIsLoadPair P : I->second) + assert(P != ValueIsLoadPair(D, false) && + P != ValueIsLoadPair(D, true) && "Inst occurs in ReverseNonLocalPtrDeps map"); } - +#endif } diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 139fa38..c214d3c 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -57,8 +57,9 @@ namespace { Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, ModRefResult &Mask) override { Mask = ModRef; - return Location(CS.getArgument(ArgIdx), UnknownSize, - CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)); + AAMDNodes AATags; + CS->getAAMetadata(AATags); + return Location(CS.getArgument(ArgIdx), UnknownSize, AATags); } ModRefResult getModRefInfo(ImmutableCallSite CS, diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index bfe8642..b3d060a 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -228,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT)) { + if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT, AT)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -283,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AT)) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); @@ -369,7 +369,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, SmallVectorImpl &NewInsts) { // See if we have a version of this value already available and dominating // PredBB. If so, there is no need to insert a new instance of it. - PHITransAddr Tmp(InVal, DL); + PHITransAddr Tmp(InVal, DL, AT); if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT)) return Tmp.getAddr(); diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp index 1b0f359..68c7535 100644 --- a/lib/Analysis/PtrUseVisitor.cpp +++ b/lib/Analysis/PtrUseVisitor.cpp @@ -17,7 +17,7 @@ using namespace llvm; void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) { for (Use &U : I.uses()) { - if (VisitedUses.insert(&U)) { + if (VisitedUses.insert(&U).second) { UseToVisit NewU = { UseToVisit::UseAndIsOffsetKnownPair(&U, IsOffsetKnown), Offset diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 7f88ae1..08ebf0d 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" @@ -25,21 +26,26 @@ using namespace llvm; #define DEBUG_TYPE "region" +namespace llvm { +template class RegionBase>; +template class RegionNodeBase>; +template class RegionInfoBase>; +} + +STATISTIC(numRegions, "The # of regions"); +STATISTIC(numSimpleRegions, "The # of simple regions"); + // Always verify if expensive checking is enabled. -#ifdef XDEBUG -static bool VerifyRegionInfo = true; -#else -static bool VerifyRegionInfo = false; -#endif static cl::opt -VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo), - cl::desc("Verify region info (time consuming)")); +VerifyRegionInfoX( + "verify-region-info", + cl::location(RegionInfoBase>::VerifyRegionInfo), + cl::desc("Verify region info (time consuming)")); -STATISTIC(numRegions, "The # of regions"); -STATISTIC(numSimpleRegions, "The # of simple regions"); -static cl::opt printStyle("print-region-style", +static cl::opt printStyleX("print-region-style", + cl::location(RegionInfo::printStyle), cl::Hidden, cl::desc("style of printing regions"), cl::values( @@ -49,812 +55,110 @@ static cl::opt printStyle("print-region-style", clEnumValN(Region::PrintRN, "rn", "print regions in detail with element_iterator"), clEnumValEnd)); -//===----------------------------------------------------------------------===// -/// Region Implementation -Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo, - DominatorTree *dt, Region *Parent) - : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {} - -Region::~Region() { - // Free the cached nodes. - for (BBNodeMapT::iterator it = BBNodeMap.begin(), - ie = BBNodeMap.end(); it != ie; ++it) - delete it->second; - - // Only clean the cache for this Region. Caches of child Regions will be - // cleaned when the child Regions are deleted. - BBNodeMap.clear(); -} - -void Region::replaceEntry(BasicBlock *BB) { - entry.setPointer(BB); -} - -void Region::replaceExit(BasicBlock *BB) { - assert(exit && "No exit to replace!"); - exit = BB; -} - -void Region::replaceEntryRecursive(BasicBlock *NewEntry) { - std::vector RegionQueue; - BasicBlock *OldEntry = getEntry(); - - RegionQueue.push_back(this); - while (!RegionQueue.empty()) { - Region *R = RegionQueue.back(); - RegionQueue.pop_back(); - - R->replaceEntry(NewEntry); - for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) - if ((*RI)->getEntry() == OldEntry) - RegionQueue.push_back(RI->get()); - } -} - -void Region::replaceExitRecursive(BasicBlock *NewExit) { - std::vector RegionQueue; - BasicBlock *OldExit = getExit(); - - RegionQueue.push_back(this); - while (!RegionQueue.empty()) { - Region *R = RegionQueue.back(); - RegionQueue.pop_back(); - - R->replaceExit(NewExit); - for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) - if ((*RI)->getExit() == OldExit) - RegionQueue.push_back(RI->get()); - } -} - -bool Region::contains(const BasicBlock *B) const { - BasicBlock *BB = const_cast(B); - - if (!DT->getNode(BB)) - return false; - - BasicBlock *entry = getEntry(), *exit = getExit(); - - // Toplevel region. - if (!exit) - return true; - - return (DT->dominates(entry, BB) - && !(DT->dominates(exit, BB) && DT->dominates(entry, exit))); -} - -bool Region::contains(const Loop *L) const { - // BBs that are not part of any loop are element of the Loop - // described by the NULL pointer. This loop is not part of any region, - // except if the region describes the whole function. - if (!L) - return getExit() == nullptr; - - if (!contains(L->getHeader())) - return false; - - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - for (SmallVectorImpl::iterator BI = ExitingBlocks.begin(), - BE = ExitingBlocks.end(); BI != BE; ++BI) - if (!contains(*BI)) - return false; - - return true; -} - -Loop *Region::outermostLoopInRegion(Loop *L) const { - if (!contains(L)) - return nullptr; - - while (L && contains(L->getParentLoop())) { - L = L->getParentLoop(); - } - - return L; -} - -Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { - assert(LI && BB && "LI and BB cannot be null!"); - Loop *L = LI->getLoopFor(BB); - return outermostLoopInRegion(L); -} - -BasicBlock *Region::getEnteringBlock() const { - BasicBlock *entry = getEntry(); - BasicBlock *Pred; - BasicBlock *enteringBlock = nullptr; - - for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; - ++PI) { - Pred = *PI; - if (DT->getNode(Pred) && !contains(Pred)) { - if (enteringBlock) - return nullptr; - - enteringBlock = Pred; - } - } - - return enteringBlock; -} - -BasicBlock *Region::getExitingBlock() const { - BasicBlock *exit = getExit(); - BasicBlock *Pred; - BasicBlock *exitingBlock = nullptr; - - if (!exit) - return nullptr; - - for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; - ++PI) { - Pred = *PI; - if (contains(Pred)) { - if (exitingBlock) - return nullptr; - - exitingBlock = Pred; - } - } - - return exitingBlock; -} - -bool Region::isSimple() const { - return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock(); -} - -std::string Region::getNameStr() const { - std::string exitName; - std::string entryName; - - if (getEntry()->getName().empty()) { - raw_string_ostream OS(entryName); - - getEntry()->printAsOperand(OS, false); - } else - entryName = getEntry()->getName(); - - if (getExit()) { - if (getExit()->getName().empty()) { - raw_string_ostream OS(exitName); - - getExit()->printAsOperand(OS, false); - } else - exitName = getExit()->getName(); - } else - exitName = ""; - - return entryName + " => " + exitName; -} - -void Region::verifyBBInRegion(BasicBlock *BB) const { - if (!contains(BB)) - llvm_unreachable("Broken region found!"); - - BasicBlock *entry = getEntry(), *exit = getExit(); - - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) - if (!contains(*SI) && exit != *SI) - llvm_unreachable("Broken region found!"); - - if (entry != BB) - for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) - if (!contains(*SI)) - llvm_unreachable("Broken region found!"); -} - -void Region::verifyWalk(BasicBlock *BB, std::set *visited) const { - BasicBlock *exit = getExit(); - - visited->insert(BB); - - verifyBBInRegion(BB); - - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) - if (*SI != exit && visited->find(*SI) == visited->end()) - verifyWalk(*SI, visited); -} - -void Region::verifyRegion() const { - // Only do verification when user wants to, otherwise this expensive - // check will be invoked by PassManager. - if (!VerifyRegionInfo) return; - - std::set visited; - verifyWalk(getEntry(), &visited); -} - -void Region::verifyRegionNest() const { - for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI) - (*RI)->verifyRegionNest(); - - verifyRegion(); -} - -Region::element_iterator Region::element_begin() { - return GraphTraits::nodes_begin(this); -} - -Region::element_iterator Region::element_end() { - return GraphTraits::nodes_end(this); -} - -Region::const_element_iterator Region::element_begin() const { - return GraphTraits::nodes_begin(this); -} - -Region::const_element_iterator Region::element_end() const { - return GraphTraits::nodes_end(this); -} - -Region* Region::getSubRegionNode(BasicBlock *BB) const { - Region *R = RI->getRegionFor(BB); - - if (!R || R == this) - return nullptr; - - // If we pass the BB out of this region, that means our code is broken. - assert(contains(R) && "BB not in current region!"); - - while (contains(R->getParent()) && R->getParent() != this) - R = R->getParent(); - - if (R->getEntry() != BB) - return nullptr; - - return R; -} - -RegionNode* Region::getBBNode(BasicBlock *BB) const { - assert(contains(BB) && "Can get BB node out of this region!"); - - BBNodeMapT::const_iterator at = BBNodeMap.find(BB); - - if (at != BBNodeMap.end()) - return at->second; - - RegionNode *NewNode = new RegionNode(const_cast(this), BB); - BBNodeMap.insert(std::make_pair(BB, NewNode)); - return NewNode; -} - -RegionNode* Region::getNode(BasicBlock *BB) const { - assert(contains(BB) && "Can get BB node out of this region!"); - if (Region* Child = getSubRegionNode(BB)) - return Child->getNode(); - - return getBBNode(BB); -} - -void Region::transferChildrenTo(Region *To) { - for (iterator I = begin(), E = end(); I != E; ++I) { - (*I)->parent = To; - To->children.push_back(std::move(*I)); - } - children.clear(); -} - -void Region::addSubRegion(Region *SubRegion, bool moveChildren) { - assert(!SubRegion->parent && "SubRegion already has a parent!"); - assert(std::find_if(begin(), end(), [&](const std::unique_ptr &R) { - return R.get() == SubRegion; - }) == children.end() && - "Subregion already exists!"); - - SubRegion->parent = this; - children.push_back(std::unique_ptr(SubRegion)); - - if (!moveChildren) - return; - - assert(SubRegion->children.size() == 0 - && "SubRegions that contain children are not supported"); - - for (element_iterator I = element_begin(), E = element_end(); I != E; ++I) - if (!(*I)->isSubRegion()) { - BasicBlock *BB = (*I)->getNodeAs(); - - if (SubRegion->contains(BB)) - RI->setRegionFor(BB, SubRegion); - } - - std::vector> Keep; - for (iterator I = begin(), E = end(); I != E; ++I) - if (SubRegion->contains(I->get()) && I->get() != SubRegion) { - (*I)->parent = SubRegion; - SubRegion->children.push_back(std::move(*I)); - } else - Keep.push_back(std::move(*I)); - - children.clear(); - children.insert(children.begin(), - std::move_iterator(Keep.begin()), - std::move_iterator(Keep.end())); -} - - -Region *Region::removeSubRegion(Region *Child) { - assert(Child->parent == this && "Child is not a child of this region!"); - Child->parent = nullptr; - RegionSet::iterator I = std::find_if( - children.begin(), children.end(), - [&](const std::unique_ptr &R) { return R.get() == Child; }); - assert(I != children.end() && "Region does not exit. Unable to remove."); - children.erase(children.begin()+(I-begin())); - return Child; -} - -unsigned Region::getDepth() const { - unsigned Depth = 0; - - for (Region *R = parent; R != nullptr; R = R->parent) - ++Depth; - - return Depth; -} -Region *Region::getExpandedRegion() const { - unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors(); - if (NumSuccessors == 0) - return nullptr; - - for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); - PI != PE; ++PI) - if (!DT->dominates(getEntry(), *PI)) - return nullptr; - - Region *R = RI->getRegionFor(exit); - - if (R->getEntry() != exit) { - if (exit->getTerminator()->getNumSuccessors() == 1) - return new Region(getEntry(), *succ_begin(exit), RI, DT); - else - return nullptr; - } - - while (R->getParent() && R->getParent()->getEntry() == exit) - R = R->getParent(); - - if (!DT->dominates(getEntry(), R->getExit())) - for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); - PI != PE; ++PI) - if (!DT->dominates(R->getExit(), *PI)) - return nullptr; - - return new Region(getEntry(), R->getExit(), RI, DT); -} - -void Region::print(raw_ostream &OS, bool print_tree, unsigned level, - enum PrintStyle Style) const { - if (print_tree) - OS.indent(level*2) << "[" << level << "] " << getNameStr(); - else - OS.indent(level*2) << getNameStr(); - - OS << "\n"; - - - if (Style != PrintNone) { - OS.indent(level*2) << "{\n"; - OS.indent(level*2 + 2); - - if (Style == PrintBB) { - for (const auto &BB : blocks()) - OS << BB->getName() << ", "; // TODO: remove the last "," - } else if (Style == PrintRN) { - for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) - OS << **I << ", "; // TODO: remove the last ", - } - - OS << "\n"; - } +//===----------------------------------------------------------------------===// +// Region implementation +// - if (print_tree) - for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI) - (*RI)->print(OS, print_tree, level+1, Style); +Region::Region(BasicBlock *Entry, BasicBlock *Exit, + RegionInfo* RI, + DominatorTree *DT, Region *Parent) : + RegionBase>(Entry, Exit, RI, DT, Parent) { - if (Style != PrintNone) - OS.indent(level*2) << "} \n"; } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void Region::dump() const { - print(dbgs(), true, getDepth(), printStyle.getValue()); -} -#endif - -void Region::clearNodeCache() { - // Free the cached nodes. - for (BBNodeMapT::iterator I = BBNodeMap.begin(), - IE = BBNodeMap.end(); I != IE; ++I) - delete I->second; - - BBNodeMap.clear(); - for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI) - (*RI)->clearNodeCache(); -} +Region::~Region() { } //===----------------------------------------------------------------------===// // RegionInfo implementation // -bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry, - BasicBlock *exit) const { - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { - BasicBlock *P = *PI; - if (DT->dominates(entry, P) && !DT->dominates(exit, P)) - return false; - } - return true; -} - -bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const { - assert(entry && exit && "entry and exit must not be null!"); - typedef DominanceFrontier::DomSetType DST; - - DST *entrySuccs = &DF->find(entry)->second; - - // Exit is the header of a loop that contains the entry. In this case, - // the dominance frontier must only contain the exit. - if (!DT->dominates(entry, exit)) { - for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); - SI != SE; ++SI) - if (*SI != exit && *SI != entry) - return false; - - return true; - } - - DST *exitSuccs = &DF->find(exit)->second; - - // Do not allow edges leaving the region. - for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end(); - SI != SE; ++SI) { - if (*SI == exit || *SI == entry) - continue; - if (exitSuccs->find(*SI) == exitSuccs->end()) - return false; - if (!isCommonDomFrontier(*SI, entry, exit)) - return false; - } - - // Do not allow edges pointing into the region. - for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end(); - SI != SE; ++SI) - if (DT->properlyDominates(entry, *SI) && *SI != exit) - return false; +RegionInfo::RegionInfo() : + RegionInfoBase>() { - - return true; -} - -void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit, - BBtoBBMap *ShortCut) const { - assert(entry && exit && "entry and exit must not be null!"); - - BBtoBBMap::iterator e = ShortCut->find(exit); - - if (e == ShortCut->end()) - // No further region at exit available. - (*ShortCut)[entry] = exit; - else { - // We found a region e that starts at exit. Therefore (entry, e->second) - // is also a region, that is larger than (entry, exit). Insert the - // larger one. - BasicBlock *BB = e->second; - (*ShortCut)[entry] = BB; - } } -DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N, - BBtoBBMap *ShortCut) const { - BBtoBBMap::iterator e = ShortCut->find(N->getBlock()); - - if (e == ShortCut->end()) - return N->getIDom(); +RegionInfo::~RegionInfo() { - return PDT->getNode(e->second)->getIDom(); -} - -bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const { - assert(entry && exit && "entry and exit must not be null!"); - - unsigned num_successors = succ_end(entry) - succ_begin(entry); - - if (num_successors <= 1 && exit == *(succ_begin(entry))) - return true; - - return false; } void RegionInfo::updateStatistics(Region *R) { ++numRegions; // TODO: Slow. Should only be enabled if -stats is used. - if (R->isSimple()) ++numSimpleRegions; -} - -Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) { - assert(entry && exit && "entry and exit must not be null!"); - - if (isTrivialRegion(entry, exit)) - return nullptr; - - Region *region = new Region(entry, exit, this, DT); - BBtoRegion.insert(std::make_pair(entry, region)); - - #ifdef XDEBUG - region->verifyRegion(); - #else - DEBUG(region->verifyRegion()); - #endif - - updateStatistics(region); - return region; -} - -void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) { - assert(entry); - - DomTreeNode *N = PDT->getNode(entry); - - if (!N) - return; - - Region *lastRegion= nullptr; - BasicBlock *lastExit = entry; - - // As only a BasicBlock that postdominates entry can finish a region, walk the - // post dominance tree upwards. - while ((N = getNextPostDom(N, ShortCut))) { - BasicBlock *exit = N->getBlock(); - - if (!exit) - break; - - if (isRegion(entry, exit)) { - Region *newRegion = createRegion(entry, exit); - - if (lastRegion) - newRegion->addSubRegion(lastRegion); - - lastRegion = newRegion; - lastExit = exit; - } - - // This can never be a region, so stop the search. - if (!DT->dominates(entry, exit)) - break; - } - - // Tried to create regions from entry to lastExit. Next time take a - // shortcut from entry to lastExit. - if (lastExit != entry) - insertShortCut(entry, lastExit, ShortCut); + if (R->isSimple()) + ++numSimpleRegions; } -void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) { - BasicBlock *entry = &(F.getEntryBlock()); - DomTreeNode *N = DT->getNode(entry); - - // Iterate over the dominance tree in post order to start with the small - // regions from the bottom of the dominance tree. If the small regions are - // detected first, detection of bigger regions is faster, as we can jump - // over the small regions. - for (po_iterator FI = po_begin(N), FE = po_end(N); FI != FE; - ++FI) { - findRegionsWithEntry(FI->getBlock(), ShortCut); - } -} +void RegionInfo::recalculate(Function &F, DominatorTree *DT_, + PostDominatorTree *PDT_, DominanceFrontier *DF_) { + DT = DT_; + PDT = PDT_; + DF = DF_; -Region *RegionInfo::getTopMostParent(Region *region) { - while (region->parent) - region = region->getParent(); - - return region; + TopLevelRegion = new Region(&F.getEntryBlock(), nullptr, + this, DT, nullptr); + updateStatistics(TopLevelRegion); + calculate(F); } -void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) { - BasicBlock *BB = N->getBlock(); - - // Passed region exit - while (BB == region->getExit()) - region = region->getParent(); - - BBtoRegionMap::iterator it = BBtoRegion.find(BB); - - // This basic block is a start block of a region. It is already in the - // BBtoRegion relation. Only the child basic blocks have to be updated. - if (it != BBtoRegion.end()) { - Region *newRegion = it->second; - region->addSubRegion(getTopMostParent(newRegion)); - region = newRegion; - } else { - BBtoRegion[BB] = region; - } +//===----------------------------------------------------------------------===// +// RegionInfoPass implementation +// - for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI) - buildRegionsTree(*CI, region); +RegionInfoPass::RegionInfoPass() : FunctionPass(ID) { + initializeRegionInfoPassPass(*PassRegistry::getPassRegistry()); } -void RegionInfo::releaseMemory() { - BBtoRegion.clear(); - if (TopLevelRegion) - delete TopLevelRegion; - TopLevelRegion = nullptr; -} +RegionInfoPass::~RegionInfoPass() { -RegionInfo::RegionInfo() : FunctionPass(ID) { - initializeRegionInfoPass(*PassRegistry::getPassRegistry()); - TopLevelRegion = nullptr; } -RegionInfo::~RegionInfo() { +bool RegionInfoPass::runOnFunction(Function &F) { releaseMemory(); -} -void RegionInfo::Calculate(Function &F) { - // ShortCut a function where for every BB the exit of the largest region - // starting with BB is stored. These regions can be threated as single BBS. - // This improves performance on linear CFGs. - BBtoBBMap ShortCut; + auto DT = &getAnalysis().getDomTree(); + auto PDT = &getAnalysis(); + auto DF = &getAnalysis(); - scanForRegions(F, &ShortCut); - BasicBlock *BB = &F.getEntryBlock(); - buildRegionsTree(DT->getNode(BB), TopLevelRegion); + RI.recalculate(F, DT, PDT, DF); + return false; } -bool RegionInfo::runOnFunction(Function &F) { - releaseMemory(); - - DT = &getAnalysis().getDomTree(); - PDT = &getAnalysis(); - DF = &getAnalysis(); - - TopLevelRegion = new Region(&F.getEntryBlock(), nullptr, this, DT, nullptr); - updateStatistics(TopLevelRegion); - - Calculate(F); +void RegionInfoPass::releaseMemory() { + RI.releaseMemory(); +} - return false; +void RegionInfoPass::verifyAnalysis() const { + RI.verifyAnalysis(); } -void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const { +void RegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); AU.addRequired(); AU.addRequired(); } -void RegionInfo::print(raw_ostream &OS, const Module *) const { - OS << "Region tree:\n"; - TopLevelRegion->print(OS, true, 0, printStyle.getValue()); - OS << "End region tree\n"; -} - -void RegionInfo::verifyAnalysis() const { - // Only do verification when user wants to, otherwise this expensive check - // will be invoked by PMDataManager::verifyPreservedAnalysis when - // a regionpass (marked PreservedAll) finish. - if (!VerifyRegionInfo) return; - - TopLevelRegion->verifyRegionNest(); -} - -// Region pass manager support. -Region *RegionInfo::getRegionFor(BasicBlock *BB) const { - BBtoRegionMap::const_iterator I= - BBtoRegion.find(BB); - return I != BBtoRegion.end() ? I->second : nullptr; -} - -void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) { - BBtoRegion[BB] = R; -} - -Region *RegionInfo::operator[](BasicBlock *BB) const { - return getRegionFor(BB); +void RegionInfoPass::print(raw_ostream &OS, const Module *) const { + RI.print(OS); } -BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { - BasicBlock *Exit = nullptr; - - while (true) { - // Get largest region that starts at BB. - Region *R = getRegionFor(BB); - while (R && R->getParent() && R->getParent()->getEntry() == BB) - R = R->getParent(); - - // Get the single exit of BB. - if (R && R->getEntry() == BB) - Exit = R->getExit(); - else if (++succ_begin(BB) == succ_end(BB)) - Exit = *succ_begin(BB); - else // No single exit exists. - return Exit; - - // Get largest region that starts at Exit. - Region *ExitR = getRegionFor(Exit); - while (ExitR && ExitR->getParent() - && ExitR->getParent()->getEntry() == Exit) - ExitR = ExitR->getParent(); - - for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE; - ++PI) - if (!R->contains(*PI) && !ExitR->contains(*PI)) - break; - - // This stops infinite cycles. - if (DT->dominates(Exit, BB)) - break; - - BB = Exit; - } - - return Exit; -} - -Region* -RegionInfo::getCommonRegion(Region *A, Region *B) const { - assert (A && B && "One of the Regions is NULL"); - - if (A->contains(B)) return A; - - while (!B->contains(A)) - B = B->getParent(); - - return B; -} - -Region* -RegionInfo::getCommonRegion(SmallVectorImpl &Regions) const { - Region* ret = Regions.back(); - Regions.pop_back(); - - for (SmallVectorImpl::const_iterator I = Regions.begin(), - E = Regions.end(); I != E; ++I) - ret = getCommonRegion(ret, *I); - - return ret; -} - -Region* -RegionInfo::getCommonRegion(SmallVectorImpl &BBs) const { - Region* ret = getRegionFor(BBs.back()); - BBs.pop_back(); - - for (SmallVectorImpl::const_iterator I = BBs.begin(), - E = BBs.end(); I != E; ++I) - ret = getCommonRegion(ret, getRegionFor(*I)); - - return ret; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void RegionInfoPass::dump() const { + RI.dump(); } +#endif -void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB) -{ - Region *R = getRegionFor(OldBB); - - setRegionFor(NewBB, R); - - while (R->getEntry() == OldBB && !R->isTopLevelRegion()) { - R->replaceEntry(NewBB); - R = R->getParent(); - } - - setRegionFor(OldBB, R); -} +char RegionInfoPass::ID = 0; -char RegionInfo::ID = 0; -INITIALIZE_PASS_BEGIN(RegionInfo, "regions", +INITIALIZE_PASS_BEGIN(RegionInfoPass, "regions", "Detect single entry single exit regions", true, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) INITIALIZE_PASS_DEPENDENCY(DominanceFrontier) -INITIALIZE_PASS_END(RegionInfo, "regions", +INITIALIZE_PASS_END(RegionInfoPass, "regions", "Detect single entry single exit regions", true, true) // Create methods available outside of this file, to use them @@ -863,7 +167,7 @@ INITIALIZE_PASS_END(RegionInfo, "regions", namespace llvm { FunctionPass *createRegionInfoPass() { - return new RegionInfo(); + return new RegionInfoPass(); } } diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 71de144..de34b72 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -45,14 +45,14 @@ static void addRegionIntoQueue(Region &R, std::deque &RQ) { /// Pass Manager itself does not invalidate any analysis info. void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const { - Info.addRequired(); + Info.addRequired(); Info.setPreservesAll(); } /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the function, and if so, return true. bool RGPassManager::runOnFunction(Function &F) { - RI = &getAnalysis(); + RI = &getAnalysis().getRegionInfo(); bool Changed = false; // Collect inherited analysis from Module level pass manager. diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 893210a..ad83113 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -56,23 +56,24 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { }; template<> -struct DOTGraphTraits : public DOTGraphTraits { +struct DOTGraphTraits : public DOTGraphTraits { - DOTGraphTraits (bool isSimple=false) + DOTGraphTraits (bool isSimple = false) : DOTGraphTraits(isSimple) {} - static std::string getGraphName(RegionInfo *DT) { + static std::string getGraphName(RegionInfoPass *DT) { return "Region Graph"; } - std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { + std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) { + RegionInfo &RI = G->getRegionInfo(); return DOTGraphTraits::getNodeLabel(Node, - G->getTopLevelRegion()); + reinterpret_cast(RI.getTopLevelRegion())); } std::string getEdgeAttributes(RegionNode *srcNode, - GraphTraits::ChildIteratorType CI, RegionInfo *RI) { - + GraphTraits::ChildIteratorType CI, RegionInfoPass *G) { + RegionInfo &RI = G->getRegionInfo(); RegionNode *destNode = *CI; if (srcNode->isSubRegion() || destNode->isSubRegion()) @@ -82,7 +83,7 @@ struct DOTGraphTraits : public DOTGraphTraits { BasicBlock *srcBB = srcNode->getNodeAs(); BasicBlock *destBB = destNode->getNodeAs(); - Region *R = RI->getRegionFor(destBB); + Region *R = RI.getRegionFor(destBB); while (R && R->getParent()) if (R->getParent()->getEntry() == destBB) @@ -98,7 +99,8 @@ struct DOTGraphTraits : public DOTGraphTraits { // Print the cluster of the subregions. This groups the single basic blocks // and adds a different background color for each group. - static void printRegionCluster(const Region &R, GraphWriter &GW, + static void printRegionCluster(const Region &R, + GraphWriter &GW, unsigned depth = 0) { raw_ostream &O = GW.getOStream(); O.indent(2 * depth) << "subgraph cluster_" << static_cast(&R) @@ -119,22 +121,23 @@ struct DOTGraphTraits : public DOTGraphTraits { for (Region::const_iterator RI = R.begin(), RE = R.end(); RI != RE; ++RI) printRegionCluster(**RI, GW, depth + 1); - RegionInfo *RI = R.getRegionInfo(); + const RegionInfo &RI = *static_cast(R.getRegionInfo()); for (const auto &BB : R.blocks()) - if (RI->getRegionFor(BB) == &R) + if (RI.getRegionFor(BB) == &R) O.indent(2 * (depth + 1)) << "Node" - << static_cast(RI->getTopLevelRegion()->getBBNode(BB)) + << static_cast(RI.getTopLevelRegion()->getBBNode(BB)) << ";\n"; O.indent(2 * depth) << "}\n"; } - static void addCustomGraphFeatures(const RegionInfo* RI, - GraphWriter &GW) { + static void addCustomGraphFeatures(const RegionInfoPass* RIP, + GraphWriter &GW) { + const RegionInfo &RI = RIP->getRegionInfo(); raw_ostream &O = GW.getOStream(); O << "\tcolorscheme = \"paired12\"\n"; - printRegionCluster(*RI->getTopLevelRegion(), GW, 4); + printRegionCluster(*RI.getTopLevelRegion(), GW, 4); } }; } //end namespace llvm @@ -142,28 +145,28 @@ struct DOTGraphTraits : public DOTGraphTraits { namespace { struct RegionViewer - : public DOTGraphTraitsViewer { + : public DOTGraphTraitsViewer { static char ID; - RegionViewer() : DOTGraphTraitsViewer("reg", ID){ + RegionViewer() : DOTGraphTraitsViewer("reg", ID){ initializeRegionViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionViewer::ID = 0; struct RegionOnlyViewer - : public DOTGraphTraitsViewer { + : public DOTGraphTraitsViewer { static char ID; - RegionOnlyViewer() : DOTGraphTraitsViewer("regonly", ID) { + RegionOnlyViewer() : DOTGraphTraitsViewer("regonly", ID) { initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry()); } }; char RegionOnlyViewer::ID = 0; struct RegionPrinter - : public DOTGraphTraitsPrinter { + : public DOTGraphTraitsPrinter { static char ID; RegionPrinter() : - DOTGraphTraitsPrinter("reg", ID) { + DOTGraphTraitsPrinter("reg", ID) { initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); } }; @@ -175,7 +178,7 @@ INITIALIZE_PASS(RegionPrinter, "dot-regions", INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function", true, true) - + INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", "View regions of function (with no function bodies)", true, true) @@ -183,10 +186,10 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only", namespace { struct RegionOnlyPrinter - : public DOTGraphTraitsPrinter { + : public DOTGraphTraitsPrinter { static char ID; RegionOnlyPrinter() : - DOTGraphTraitsPrinter("reg", ID) { + DOTGraphTraitsPrinter("reg", ID) { initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); } }; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 06dbde5..68549ef 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1,4 +1,4 @@ -//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===// +//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===// // // The LLVM Compiler Infrastructure // @@ -59,9 +59,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -78,6 +80,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -113,6 +116,7 @@ VerifySCEV("verify-scev", INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionTracker) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) @@ -671,7 +675,321 @@ static void GroupByComplexity(SmallVectorImpl &Ops, } } +static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::srem(A, B); +} + +static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::sdiv(A, B); +} + +static const APInt urem(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + if (ABW > BBW) + B = B.zext(ABW); + else if (ABW < BBW) + A = A.zext(BBW); + + return APIntOps::urem(A, B); +} + +static const APInt udiv(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.zext(ABW); + else if (ABW < BBW) + A = A.zext(BBW); + + return APIntOps::udiv(A, B); +} + +namespace { +struct FindSCEVSize { + int Size; + FindSCEVSize() : Size(0) {} + + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; + } + bool isDone() const { + return false; + } +}; +} + +// Returns the size of the SCEV S. +static inline int sizeOfSCEV(const SCEV *S) { + FindSCEVSize F; + SCEVTraversal ST(F); + ST.visitAll(S); + return F.Size; +} + +namespace { + +template +struct SCEVDivision : public SCEVVisitor { +public: + // Computes the Quotient and Remainder of the division of Numerator by + // Denominator. + static void divide(ScalarEvolution &SE, const SCEV *Numerator, + const SCEV *Denominator, const SCEV **Quotient, + const SCEV **Remainder) { + assert(Numerator && Denominator && "Uninitialized SCEV"); + + Derived D(SE, Numerator, Denominator); + + // Check for the trivial case here to avoid having to check for it in the + // rest of the code. + if (Numerator == Denominator) { + *Quotient = D.One; + *Remainder = D.Zero; + return; + } + + if (Numerator->isZero()) { + *Quotient = D.Zero; + *Remainder = D.Zero; + return; + } + + // Split the Denominator when it is a product. + if (const SCEVMulExpr *T = dyn_cast(Denominator)) { + const SCEV *Q, *R; + *Quotient = Numerator; + for (const SCEV *Op : T->operands()) { + divide(SE, *Quotient, Op, &Q, &R); + *Quotient = Q; + + // Bail out when the Numerator is not divisible by one of the terms of + // the Denominator. + if (!R->isZero()) { + *Quotient = D.Zero; + *Remainder = Numerator; + return; + } + } + *Remainder = D.Zero; + return; + } + + D.visit(Numerator); + *Quotient = D.Quotient; + *Remainder = D.Remainder; + } + + // Except in the trivial case described above, we do not know how to divide + // Expr by Denominator for the following functions with empty implementation. + void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} + void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} + void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} + void visitUDivExpr(const SCEVUDivExpr *Numerator) {} + void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} + void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} + void visitUnknown(const SCEVUnknown *Numerator) {} + void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} + + void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { + const SCEV *StartQ, *StartR, *StepQ, *StepR; + assert(Numerator->isAffine() && "Numerator should be affine"); + divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); + divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); + Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), + Numerator->getNoWrapFlags()); + Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), + Numerator->getNoWrapFlags()); + } + + void visitAddExpr(const SCEVAddExpr *Numerator) { + SmallVector Qs, Rs; + Type *Ty = Denominator->getType(); + + for (const SCEV *Op : Numerator->operands()) { + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); + + // Bail out if types do not match. + if (Ty != Q->getType() || Ty != R->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + Qs.push_back(Q); + Rs.push_back(R); + } + + if (Qs.size() == 1) { + Quotient = Qs[0]; + Remainder = Rs[0]; + return; + } + + Quotient = SE.getAddExpr(Qs); + Remainder = SE.getAddExpr(Rs); + } + + void visitMulExpr(const SCEVMulExpr *Numerator) { + SmallVector Qs; + Type *Ty = Denominator->getType(); + + bool FoundDenominatorTerm = false; + for (const SCEV *Op : Numerator->operands()) { + // Bail out if types do not match. + if (Ty != Op->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + if (FoundDenominatorTerm) { + Qs.push_back(Op); + continue; + } + + // Check whether Denominator divides one of the product operands. + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); + if (!R->isZero()) { + Qs.push_back(Op); + continue; + } + + // Bail out if types do not match. + if (Ty != Q->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + FoundDenominatorTerm = true; + Qs.push_back(Q); + } + + if (FoundDenominatorTerm) { + Remainder = Zero; + if (Qs.size() == 1) + Quotient = Qs[0]; + else + Quotient = SE.getMulExpr(Qs); + return; + } + + if (!isa(Denominator)) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + // The Remainder is obtained by replacing Denominator by 0 in Numerator. + ValueToValueMap RewriteMap; + RewriteMap[cast(Denominator)->getValue()] = + cast(Zero)->getValue(); + Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + + if (Remainder->isZero()) { + // The Quotient is obtained by replacing Denominator by 1 in Numerator. + RewriteMap[cast(Denominator)->getValue()] = + cast(One)->getValue(); + Quotient = + SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + return; + } + + // Quotient is (Numerator - Remainder) divided by Denominator. + const SCEV *Q, *R; + const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); + if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) { + // This SCEV does not seem to simplify: fail the division here. + Quotient = Zero; + Remainder = Numerator; + return; + } + divide(SE, Diff, Denominator, &Q, &R); + assert(R == Zero && + "(Numerator - Remainder) should evenly divide Denominator"); + Quotient = Q; + } + +private: + SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, + const SCEV *Denominator) + : SE(S), Denominator(Denominator) { + Zero = SE.getConstant(Denominator->getType(), 0); + One = SE.getConstant(Denominator->getType(), 1); + + // By default, we don't know how to divide Expr by Denominator. + // Providing the default here simplifies the rest of the code. + Quotient = Zero; + Remainder = Numerator; + } + + ScalarEvolution &SE; + const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; + + friend struct SCEVSDivision; + friend struct SCEVUDivision; +}; + +struct SCEVSDivision : public SCEVDivision { + SCEVSDivision(ScalarEvolution &S, const SCEV *Numerator, + const SCEV *Denominator) + : SCEVDivision(S, Numerator, Denominator) {} + + void visitConstant(const SCEVConstant *Numerator) { + if (const SCEVConstant *D = dyn_cast(Denominator)) { + Quotient = SE.getConstant(sdiv(Numerator, D)); + Remainder = SE.getConstant(srem(Numerator, D)); + return; + } + } +}; + +struct SCEVUDivision : public SCEVDivision { + SCEVUDivision(ScalarEvolution &S, const SCEV *Numerator, + const SCEV *Denominator) + : SCEVDivision(S, Numerator, Denominator) {} + + void visitConstant(const SCEVConstant *Numerator) { + if (const SCEVConstant *D = dyn_cast(Denominator)) { + Quotient = SE.getConstant(udiv(Numerator, D)); + Remainder = SE.getConstant(urem(Numerator, D)); + return; + } + } +}; + +} //===----------------------------------------------------------------------===// // Simple SCEV method implementations @@ -2061,71 +2379,66 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // Okay, if there weren't any loop invariants to be folded, check to see if // there are multiple AddRec's with the same loop induction variable being // multiplied together. If so, we can fold them. + + // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} + // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ + // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z + // ]]],+,...up to x=2n}. + // Note that the arguments to choose() are always integers with values + // known at compile time, never SCEV objects. + // + // The implementation avoids pointless extra computations when the two + // addrec's are of different length (mathematically, it's equivalent to + // an infinite stream of zeros on the right). + bool OpsModified = false; for (unsigned OtherIdx = Idx+1; - OtherIdx < Ops.size() && isa(Ops[OtherIdx]); + OtherIdx != Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { - if (AddRecLoop != cast(Ops[OtherIdx])->getLoop()) + const SCEVAddRecExpr *OtherAddRec = + dyn_cast(Ops[OtherIdx]); + if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) continue; - // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} - // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ - // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z - // ]]],+,...up to x=2n}. - // Note that the arguments to choose() are always integers with values - // known at compile time, never SCEV objects. - // - // The implementation avoids pointless extra computations when the two - // addrec's are of different length (mathematically, it's equivalent to - // an infinite stream of zeros on the right). - bool OpsModified = false; - for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) { - const SCEVAddRecExpr *OtherAddRec = - dyn_cast(Ops[OtherIdx]); - if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) - continue; - - bool Overflow = false; - Type *Ty = AddRec->getType(); - bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; - SmallVector AddRecOps; - for (int x = 0, xe = AddRec->getNumOperands() + - OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { - const SCEV *Term = getConstant(Ty, 0); - for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { - uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); - for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), - ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); - z < ze && !Overflow; ++z) { - uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); - uint64_t Coeff; - if (LargerThan64Bits) - Coeff = umul_ov(Coeff1, Coeff2, Overflow); - else - Coeff = Coeff1*Coeff2; - const SCEV *CoeffTerm = getConstant(Ty, Coeff); - const SCEV *Term1 = AddRec->getOperand(y-z); - const SCEV *Term2 = OtherAddRec->getOperand(z); - Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); - } + bool Overflow = false; + Type *Ty = AddRec->getType(); + bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; + SmallVector AddRecOps; + for (int x = 0, xe = AddRec->getNumOperands() + + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { + const SCEV *Term = getConstant(Ty, 0); + for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { + uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); + for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), + ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); + z < ze && !Overflow; ++z) { + uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); + uint64_t Coeff; + if (LargerThan64Bits) + Coeff = umul_ov(Coeff1, Coeff2, Overflow); + else + Coeff = Coeff1*Coeff2; + const SCEV *CoeffTerm = getConstant(Ty, Coeff); + const SCEV *Term1 = AddRec->getOperand(y-z); + const SCEV *Term2 = OtherAddRec->getOperand(z); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); } - AddRecOps.push_back(Term); - } - if (!Overflow) { - const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), - SCEV::FlagAnyWrap); - if (Ops.size() == 2) return NewAddRec; - Ops[Idx] = NewAddRec; - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; - OpsModified = true; - AddRec = dyn_cast(NewAddRec); - if (!AddRec) - break; } + AddRecOps.push_back(Term); + } + if (!Overflow) { + const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), + SCEV::FlagAnyWrap); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = NewAddRec; + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + OpsModified = true; + AddRec = dyn_cast(NewAddRec); + if (!AddRec) + break; } - if (OpsModified) - return getMulExpr(Ops); } + if (OpsModified) + return getMulExpr(Ops); // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -3082,7 +3395,8 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { Visited.insert(PN); while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I)) continue; + if (!Visited.insert(I).second) + continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); @@ -3263,7 +3577,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, DL, TLI, DT)) + if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AT)) if (LI->replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -3395,7 +3709,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT); return Zeros.countTrailingOnes(); } @@ -3403,6 +3717,31 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { return 0; } +/// GetRangeFromMetadata - Helper method to assign a range to V from +/// metadata present in the IR. +static Optional GetRangeFromMetadata(Value *V) { + if (Instruction *I = dyn_cast(V)) { + if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) { + ConstantRange TotalRange( + cast(I->getType())->getBitWidth(), false); + + unsigned NumRanges = MD->getNumOperands() / 2; + assert(NumRanges >= 1); + + for (unsigned i = 0; i < NumRanges; ++i) { + ConstantInt *Lower = cast(MD->getOperand(2*i + 0)); + ConstantInt *Upper = cast(MD->getOperand(2*i + 1)); + ConstantRange Range(Lower->getValue(), Upper->getValue()); + TotalRange = TotalRange.unionWith(Range); + } + + return TotalRange; + } + } + + return None; +} + /// getUnsignedRange - Determine the unsigned range for a particular SCEV. /// ConstantRange @@ -3532,9 +3871,14 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { } if (const SCEVUnknown *U = dyn_cast(S)) { + // Check if the IR explicitly contains !range metadata. + Optional MDRange = GetRangeFromMetadata(U->getValue()); + if (MDRange.hasValue()) + ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); + // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL); + computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3683,10 +4027,15 @@ ScalarEvolution::getSignedRange(const SCEV *S) { } if (const SCEVUnknown *U = dyn_cast(S)) { + // Check if the IR explicitly contains !range metadata. + Optional MDRange = GetRangeFromMetadata(U->getValue()); + if (MDRange.hasValue()) + ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue()); + // For a SCEVUnknown, ask ValueTracking. if (!U->getValue()->getType()->isIntegerTy() && !DL) return setSignedRange(U, ConservativeResult); - unsigned NS = ComputeNumSignBits(U->getValue(), DL); + unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AT, nullptr, DT); if (NS <= 1) return setSignedRange(U, ConservativeResult); return setSignedRange(U, ConservativeResult.intersectWith( @@ -3793,7 +4142,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, + 0, AT, nullptr, DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -4070,6 +4420,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Iteration Count Computation Code // +unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) { + if (BasicBlock *ExitingBB = L->getExitingBlock()) + return getSmallConstantTripCount(L, ExitingBB); + + // No trip count information for multiple exits. + return 0; +} + /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a /// normal unsigned value. Returns 0 if the trip count is unknown or not /// constant. Will also return 0 if the maximum trip count is very large (>= @@ -4080,19 +4438,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { /// before taking the branch. For loops with multiple exits, it may not be the /// number times that the loop header executes because the loop may exit /// prematurely via another branch. -/// -/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of -/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all -/// loop exits. getExitCount() may return an exact count for this branch -/// assuming no-signed-wrap. The number of well-defined iterations may actually -/// be higher than this trip count if this exit test is skipped and the loop -/// exits via a different branch. Ideally, getExitCount() would know whether it -/// depends on a NSW assumption, and we would only fall back to a conservative -/// trip count in that case. -unsigned ScalarEvolution:: -getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) { +unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, + BasicBlock *ExitingBlock) { + assert(ExitingBlock && "Must pass a non-null exiting block!"); + assert(L->isLoopExiting(ExitingBlock) && + "Exiting block must actually branch out of the loop!"); const SCEVConstant *ExitCount = - dyn_cast(getBackedgeTakenCount(L)); + dyn_cast(getExitCount(L, ExitingBlock)); if (!ExitCount) return 0; @@ -4106,6 +4458,14 @@ getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) { return ((unsigned)ExitConst->getZExtValue()) + 1; } +unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) { + if (BasicBlock *ExitingBB = L->getExitingBlock()) + return getSmallConstantTripMultiple(L, ExitingBB); + + // No trip multiple information for multiple exits. + return 0; +} + /// getSmallConstantTripMultiple - Returns the largest constant divisor of the /// trip count of this loop as a normal unsigned value, if possible. This /// means that the actual trip count is always a multiple of the returned @@ -4118,9 +4478,13 @@ getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) { /// /// As explained in the comments for getSmallConstantTripCount, this assumes /// that control exits the loop via ExitingBlock. -unsigned ScalarEvolution:: -getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) { - const SCEV *ExitCount = getBackedgeTakenCount(L); +unsigned +ScalarEvolution::getSmallConstantTripMultiple(Loop *L, + BasicBlock *ExitingBlock) { + assert(ExitingBlock && "Must pass a non-null exiting block!"); + assert(L->isLoopExiting(ExitingBlock) && + "Exiting block must actually branch out of the loop!"); + const SCEV *ExitCount = getExitCount(L, ExitingBlock); if (ExitCount == getCouldNotCompute()) return 1; @@ -4230,7 +4594,8 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { SmallPtrSet Visited; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I)) continue; + if (!Visited.insert(I).second) + continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); @@ -4282,7 +4647,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) { SmallPtrSet Visited; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (!Visited.insert(I)) continue; + if (!Visited.insert(I).second) + continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); @@ -4316,7 +4682,8 @@ void ScalarEvolution::forgetValue(Value *V) { SmallPtrSet Visited; while (!Worklist.empty()) { I = Worklist.pop_back_val(); - if (!Visited.insert(I)) continue; + if (!Visited.insert(I).second) + continue; ValueExprMapType::iterator It = ValueExprMap.find_as(static_cast(I)); @@ -4467,20 +4834,12 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // non-exiting iterations. Partition the loop exits into two kinds: // LoopMustExits and LoopMayExits. // - // A LoopMustExit meets two requirements: - // - // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit - // test condition cannot be skipped (the tested variable has unit stride or - // the test is less-than or greater-than, rather than a strict inequality). - // - // (b) It must dominate the loop latch, hence must be tested on every loop - // iteration. - // - // If any computable LoopMustExit is found, then MaxBECount is the minimum - // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is - // conservatively the maximum EL.Max, where CouldNotCompute is considered - // greater than any computable EL.Max. - if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch && + // If the exit dominates the loop latch, it is a LoopMustExit otherwise it + // is a LoopMayExit. If any computable LoopMustExit is found, then + // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise, + // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is + // considered greater than any computable EL.Max. + if (EL.Max != getCouldNotCompute() && Latch && DT->dominates(ExitBB, Latch)) { if (!MustExitMaxBECount) MustExitMaxBECount = EL.Max; @@ -4567,18 +4926,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { return getCouldNotCompute(); } + bool IsOnlyExit = (L->getExitingBlock() != nullptr); TerminatorInst *Term = ExitingBlock->getTerminator(); if (BranchInst *BI = dyn_cast(Term)) { assert(BI->isConditional() && "If unconditional, it can't be in loop!"); // Proceed to the next level to examine the exit condition expression. return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), BI->getSuccessor(1), - /*IsSubExpr=*/false); + /*ControlsExit=*/IsOnlyExit); } if (SwitchInst *SI = dyn_cast(Term)) return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit, - /*IsSubExpr=*/false); + /*ControlsExit=*/IsOnlyExit); return getCouldNotCompute(); } @@ -4587,28 +4947,27 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of ExitCond, TBB, and FBB. /// -/// @param IsSubExpr is true if ExitCond does not directly control the exit -/// branch. In this case, we cannot assume that the loop only exits when the -/// condition is true and cannot infer that failing to meet the condition prior -/// to integer wraparound results in undefined behavior. +/// @param ControlsExit is true if ExitCond directly controls the exit +/// branch. In this case, we can assume that the loop exits only if the +/// condition is true and can infer that failing to meet the condition prior to +/// integer wraparound results in undefined behavior. ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr) { + bool ControlsExit) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - IsSubExpr || EitherMayExit); + ControlsExit && !EitherMayExit); ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - IsSubExpr || EitherMayExit); + ControlsExit && !EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); - bool MustExit = false; if (EitherMayExit) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. @@ -4623,7 +4982,6 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; else MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); - MustExit = EL0.MustExit || EL1.MustExit; } else { // Both conditions must be true at the same time for the loop to exit. // For now, be conservative. @@ -4632,21 +4990,19 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; if (EL0.Exact == EL1.Exact) BECount = EL0.Exact; - MustExit = EL0.MustExit && EL1.MustExit; } - return ExitLimit(BECount, MaxBECount, MustExit); + return ExitLimit(BECount, MaxBECount); } if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - IsSubExpr || EitherMayExit); + ControlsExit && !EitherMayExit); ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - IsSubExpr || EitherMayExit); + ControlsExit && !EitherMayExit); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); - bool MustExit = false; if (EitherMayExit) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. @@ -4661,7 +5017,6 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; else MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max); - MustExit = EL0.MustExit || EL1.MustExit; } else { // Both conditions must be false at the same time for the loop to exit. // For now, be conservative. @@ -4670,17 +5025,16 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, MaxBECount = EL0.Max; if (EL0.Exact == EL1.Exact) BECount = EL0.Exact; - MustExit = EL0.MustExit && EL1.MustExit; } - return ExitLimit(BECount, MaxBECount, MustExit); + return ExitLimit(BECount, MaxBECount); } } // With an icmp, it may be feasible to compute an exact backedge-taken count. // Proceed to the next level to examine the icmp. if (ICmpInst *ExitCondICmp = dyn_cast(ExitCond)) - return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr); + return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); // Check for a constant condition. These are normally stripped out by // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -4707,7 +5061,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond, BasicBlock *TBB, BasicBlock *FBB, - bool IsSubExpr) { + bool ControlsExit) { // If the condition was exit on true, convert the condition to exit on false ICmpInst::Predicate Cond; @@ -4759,7 +5113,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr); + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); if (EL.hasAnyInfo()) return EL; break; } @@ -4772,14 +5126,14 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_ULT: { // while (X < Y) bool IsSigned = Cond == ICmpInst::ICMP_SLT; - ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr); + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit); if (EL.hasAnyInfo()) return EL; break; } case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: { // while (X > Y) bool IsSigned = Cond == ICmpInst::ICMP_SGT; - ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr); + ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit); if (EL.hasAnyInfo()) return EL; break; } @@ -4801,7 +5155,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch, BasicBlock *ExitingBlock, - bool IsSubExpr) { + bool ControlsExit) { assert(!L->contains(ExitingBlock) && "Not an exiting block!"); // Give up if the exit is the default dest of a switch. @@ -4814,7 +5168,7 @@ ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L, const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock)); // while (X != Y) --> while (X-Y != 0) - ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr); + ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); if (EL.hasAnyInfo()) return EL; @@ -5687,7 +6041,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// effectively V != 0. We know and take advantage of the fact that this /// expression only being used in a comparison by zero context. ScalarEvolution::ExitLimit -ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { +ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) { // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. @@ -5781,37 +6135,30 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { else MaxBECount = getConstant(CountDown ? CR.getUnsignedMax() : -CR.getUnsignedMin()); - return ExitLimit(Distance, MaxBECount, /*MustExit=*/true); + return ExitLimit(Distance, MaxBECount); } - // If the recurrence is known not to wraparound, unsigned divide computes the - // back edge count. (Ideally we would have an "isexact" bit for udiv). We know - // that the value will either become zero (and thus the loop terminates), that - // the loop will terminate through some other exit condition first, or that - // the loop has undefined behavior. This means we can't "miss" the exit - // value, even with nonunit stride, and exit later via the same branch. Note - // that we can skip this exit if loop later exits via a different - // branch. Hence MustExit=false. - // - // This is only valid for expressions that directly compute the loop exit. It - // is invalid for subexpressions in which the loop may exit through this - // branch even if this subexpression is false. In that case, the trip count - // computed by this udiv could be smaller than the number of well-defined - // iterations. - if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) { + // If the step exactly divides the distance then unsigned divide computes the + // backedge count. + const SCEV *Q, *R; + ScalarEvolution &SE = *const_cast(this); + SCEVUDivision::divide(SE, Distance, Step, &Q, &R); + if (R->isZero()) { const SCEV *Exact = - getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact, /*MustExit=*/false); + getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + return ExitLimit(Exact, Exact); } - // If Step is a power of two that evenly divides Start we know that the loop - // will always terminate. Start may not be a constant so we just have the - // number of trailing zeros available. This is safe even in presence of - // overflow as the recurrence will overflow to exactly 0. - const APInt &StepV = StepC->getValue()->getValue(); - if (StepV.isPowerOf2() && - GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros()) - return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + // If the condition controls loop exit (the loop exits only if the expression + // is true) and the addition is no-wrap we can use unsigned divide to + // compute the backedge count. In this case, the step may not divide the + // distance, but we don't care because if the condition is "missed" the loop + // will have undefined behavior due to wrapping. + if (ControlsExit && AddRec->getNoWrapFlags(SCEV::FlagNW)) { + const SCEV *Exact = + getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); + return ExitLimit(Exact, Exact); + } // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast(Start)) @@ -6309,19 +6656,30 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, // (interprocedural conditions notwithstanding). if (!L) return true; + if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true; + BasicBlock *Latch = L->getLoopLatch(); if (!Latch) return false; BranchInst *LoopContinuePredicate = dyn_cast(Latch->getTerminator()); - if (!LoopContinuePredicate || - LoopContinuePredicate->isUnconditional()) - return false; + if (LoopContinuePredicate && LoopContinuePredicate->isConditional() && + isImpliedCond(Pred, LHS, RHS, + LoopContinuePredicate->getCondition(), + LoopContinuePredicate->getSuccessor(0) != L->getHeader())) + return true; + + // Check conditions due to any @llvm.assume intrinsics. + for (auto &CI : AT->assumptions(F)) { + if (!DT->dominates(CI, Latch->getTerminator())) + continue; - return isImpliedCond(Pred, LHS, RHS, - LoopContinuePredicate->getCondition(), - LoopContinuePredicate->getSuccessor(0) != L->getHeader()); + if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + return true; + } + + return false; } /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected @@ -6335,6 +6693,8 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, // (interprocedural conditions notwithstanding). if (!L) return false; + if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true; + // Starting at the loop predecessor, climb up the predecessor chain, as long // as there are predecessors that can be found that have unique successors // leading to the original header. @@ -6355,6 +6715,15 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return true; } + // Check conditions due to any @llvm.assume intrinsics. + for (auto &CI : AT->assumptions(F)) { + if (!DT->dominates(CI, L->getHeader())) + continue; + + if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) + return true; + } + return false; } @@ -6469,6 +6838,66 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, RHS, LHS, FoundLHS, FoundRHS); } + // Check if we can make progress by sharpening ranges. + if (FoundPred == ICmpInst::ICMP_NE && + (isa(FoundLHS) || isa(FoundRHS))) { + + const SCEVConstant *C = nullptr; + const SCEV *V = nullptr; + + if (isa(FoundLHS)) { + C = cast(FoundLHS); + V = FoundRHS; + } else { + C = cast(FoundRHS); + V = FoundLHS; + } + + // The guarding predicate tells us that C != V. If the known range + // of V is [C, t), we can sharpen the range to [C + 1, t). The + // range we consider has to correspond to same signedness as the + // predicate we're interested in folding. + + APInt Min = ICmpInst::isSigned(Pred) ? + getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin(); + + if (Min == C->getValue()->getValue()) { + // Given (V >= Min && V != Min) we conclude V >= (Min + 1). + // This is true even if (Min + 1) wraps around -- in case of + // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). + + APInt SharperMin = Min + 1; + + switch (Pred) { + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGE: + // We know V `Pred` SharperMin. If this implies LHS `Pred` + // RHS, we're done. + if (isImpliedCondOperands(Pred, LHS, RHS, V, + getConstant(SharperMin))) + return true; + + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: + // We know from the range information that (V `Pred` Min || + // V == Min). We know from the guarding condition that !(V + // == Min). This gives us + // + // V `Pred` Min || V == Min && !(V == Min) + // => V `Pred` Min + // + // If V `Pred` Min implies LHS `Pred` RHS, we're done. + + if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) + return true; + + default: + // No change + break; + } + } + } + // Check whether the actual condition is beyond sufficient. if (FoundPred == ICmpInst::ICMP_EQ) if (ICmpInst::isTrueWhenEqual(Pred)) @@ -6614,13 +7043,13 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, /// specified less-than comparison will execute. If not computable, return /// CouldNotCompute. /// -/// @param IsSubExpr is true when the LHS < RHS condition does not directly -/// control the branch. In this case, we can only compute an iteration count for -/// a subexpression that cannot overflow before evaluating true. +/// @param ControlsExit is true when the LHS < RHS condition directly controls +/// the branch (loops exits only if condition is true). In this case, we can use +/// NoWrapFlags to skip overflow checks. ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool IsSubExpr) { + bool ControlsExit) { // We handle only IV < Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); @@ -6631,7 +7060,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); - bool NoWrap = !IsSubExpr && + bool NoWrap = ControlsExit && IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); const SCEV *Stride = IV->getStepRecurrence(*this); @@ -6651,9 +7080,19 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, : ICmpInst::ICMP_ULT; const SCEV *Start = IV->getStart(); const SCEV *End = RHS; - if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) - End = IsSigned ? getSMaxExpr(RHS, Start) - : getUMaxExpr(RHS, Start); + if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) { + const SCEV *Diff = getMinusSCEV(RHS, Start); + // If we have NoWrap set, then we can assume that the increment won't + // overflow, in which case if RHS - Start is a constant, we don't need to + // do a max operation since we can just figure it out statically + if (NoWrap && isa(Diff)) { + APInt D = dyn_cast(Diff)->getValue()->getValue(); + if (D.isNegative()) + End = Start; + } else + End = IsSigned ? getSMaxExpr(RHS, Start) + : getUMaxExpr(RHS, Start); + } const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); @@ -6684,13 +7123,13 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount, /*MustExit=*/true); + return ExitLimit(BECount, MaxBECount); } ScalarEvolution::ExitLimit ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, - bool IsSubExpr) { + bool ControlsExit) { // We handle only IV > Invariant if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); @@ -6701,7 +7140,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); - bool NoWrap = !IsSubExpr && + bool NoWrap = ControlsExit && IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); @@ -6722,9 +7161,19 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const SCEV *Start = IV->getStart(); const SCEV *End = RHS; - if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) - End = IsSigned ? getSMinExpr(RHS, Start) - : getUMinExpr(RHS, Start); + if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) { + const SCEV *Diff = getMinusSCEV(RHS, Start); + // If we have NoWrap set, then we can assume that the increment won't + // overflow, in which case if RHS - Start is a constant, we don't need to + // do a max operation since we can just figure it out statically + if (NoWrap && isa(Diff)) { + APInt D = dyn_cast(Diff)->getValue()->getValue(); + if (!D.isNegative()) + End = Start; + } else + End = IsSigned ? getSMinExpr(RHS, Start) + : getUMinExpr(RHS, Start); + } const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); @@ -6756,7 +7205,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, if (isa(MaxBECount)) MaxBECount = BECount; - return ExitLimit(BECount, MaxBECount, /*MustExit=*/true); + return ExitLimit(BECount, MaxBECount); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -6849,401 +7298,139 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // Pick the smallest positive root value. if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, - R1->getValue(), R2->getValue()))) { - if (CB->getZExtValue() == false) - std::swap(R1, R2); // R1 is the minimum root now. - - // Make sure the root is not off by one. The returned iteration should - // not be in the range, but the previous one should be. When solving - // for "X*X < 5", for example, we should not return a root of 2. - ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, - R1->getValue(), - SE); - if (Range.contains(R1Val->getValue())) { - // The next iteration must be out of the range... - ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); - - R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); - if (!Range.contains(R1Val->getValue())) - return SE.getConstant(NextVal); - return SE.getCouldNotCompute(); // Something strange happened - } - - // If R1 was not in the range, then it is a good return value. Make - // sure that R1-1 WAS in the range though, just in case. - ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); - R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); - if (Range.contains(R1Val->getValue())) - return R1; - return SE.getCouldNotCompute(); // Something strange happened - } - } - } - - return SE.getCouldNotCompute(); -} - -namespace { -struct FindUndefs { - bool Found; - FindUndefs() : Found(false) {} - - bool follow(const SCEV *S) { - if (const SCEVUnknown *C = dyn_cast(S)) { - if (isa(C->getValue())) - Found = true; - } else if (const SCEVConstant *C = dyn_cast(S)) { - if (isa(C->getValue())) - Found = true; - } - - // Keep looking if we haven't found it yet. - return !Found; - } - bool isDone() const { - // Stop recursion if we have found an undef. - return Found; - } -}; -} - -// Return true when S contains at least an undef value. -static inline bool -containsUndefs(const SCEV *S) { - FindUndefs F; - SCEVTraversal ST(F); - ST.visitAll(S); - - return F.Found; -} - -namespace { -// Collect all steps of SCEV expressions. -struct SCEVCollectStrides { - ScalarEvolution &SE; - SmallVectorImpl &Strides; - - SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl &S) - : SE(SE), Strides(S) {} - - bool follow(const SCEV *S) { - if (const SCEVAddRecExpr *AR = dyn_cast(S)) - Strides.push_back(AR->getStepRecurrence(SE)); - return true; - } - bool isDone() const { return false; } -}; - -// Collect all SCEVUnknown and SCEVMulExpr expressions. -struct SCEVCollectTerms { - SmallVectorImpl &Terms; - - SCEVCollectTerms(SmallVectorImpl &T) - : Terms(T) {} - - bool follow(const SCEV *S) { - if (isa(S) || isa(S)) { - if (!containsUndefs(S)) - Terms.push_back(S); - - // Stop recursion: once we collected a term, do not walk its operands. - return false; - } - - // Keep looking. - return true; - } - bool isDone() const { return false; } -}; -} - -/// Find parametric terms in this SCEVAddRecExpr. -void SCEVAddRecExpr::collectParametricTerms( - ScalarEvolution &SE, SmallVectorImpl &Terms) const { - SmallVector Strides; - SCEVCollectStrides StrideCollector(SE, Strides); - visitAll(this, StrideCollector); - - DEBUG({ - dbgs() << "Strides:\n"; - for (const SCEV *S : Strides) - dbgs() << *S << "\n"; - }); - - for (const SCEV *S : Strides) { - SCEVCollectTerms TermCollector(Terms); - visitAll(S, TermCollector); - } - - DEBUG({ - dbgs() << "Terms:\n"; - for (const SCEV *T : Terms) - dbgs() << *T << "\n"; - }); -} - -static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue(); - APInt B = C2->getValue()->getValue(); - uint32_t ABW = A.getBitWidth(); - uint32_t BBW = B.getBitWidth(); - - if (ABW > BBW) - B = B.sext(ABW); - else if (ABW < BBW) - A = A.sext(BBW); - - return APIntOps::srem(A, B); -} - -static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { - APInt A = C1->getValue()->getValue(); - APInt B = C2->getValue()->getValue(); - uint32_t ABW = A.getBitWidth(); - uint32_t BBW = B.getBitWidth(); - - if (ABW > BBW) - B = B.sext(ABW); - else if (ABW < BBW) - A = A.sext(BBW); - - return APIntOps::sdiv(A, B); -} - -namespace { -struct FindSCEVSize { - int Size; - FindSCEVSize() : Size(0) {} - - bool follow(const SCEV *S) { - ++Size; - // Keep looking at all operands of S. - return true; - } - bool isDone() const { - return false; - } -}; -} - -// Returns the size of the SCEV S. -static inline int sizeOfSCEV(const SCEV *S) { - FindSCEVSize F; - SCEVTraversal ST(F); - ST.visitAll(S); - return F.Size; -} - -namespace { - -struct SCEVDivision : public SCEVVisitor { -public: - // Computes the Quotient and Remainder of the division of Numerator by - // Denominator. - static void divide(ScalarEvolution &SE, const SCEV *Numerator, - const SCEV *Denominator, const SCEV **Quotient, - const SCEV **Remainder) { - assert(Numerator && Denominator && "Uninitialized SCEV"); - - SCEVDivision D(SE, Numerator, Denominator); - - // Check for the trivial case here to avoid having to check for it in the - // rest of the code. - if (Numerator == Denominator) { - *Quotient = D.One; - *Remainder = D.Zero; - return; - } - - if (Numerator->isZero()) { - *Quotient = D.Zero; - *Remainder = D.Zero; - return; - } + R1->getValue(), R2->getValue()))) { + if (CB->getZExtValue() == false) + std::swap(R1, R2); // R1 is the minimum root now. - // Split the Denominator when it is a product. - if (const SCEVMulExpr *T = dyn_cast(Denominator)) { - const SCEV *Q, *R; - *Quotient = Numerator; - for (const SCEV *Op : T->operands()) { - divide(SE, *Quotient, Op, &Q, &R); - *Quotient = Q; + // Make sure the root is not off by one. The returned iteration should + // not be in the range, but the previous one should be. When solving + // for "X*X < 5", for example, we should not return a root of 2. + ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this, + R1->getValue(), + SE); + if (Range.contains(R1Val->getValue())) { + // The next iteration must be out of the range... + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); - // Bail out when the Numerator is not divisible by one of the terms of - // the Denominator. - if (!R->isZero()) { - *Quotient = D.Zero; - *Remainder = Numerator; - return; + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (!Range.contains(R1Val->getValue())) + return SE.getConstant(NextVal); + return SE.getCouldNotCompute(); // Something strange happened } + + // If R1 was not in the range, then it is a good return value. Make + // sure that R1-1 WAS in the range though, just in case. + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); + R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (Range.contains(R1Val->getValue())) + return R1; + return SE.getCouldNotCompute(); // Something strange happened } - *Remainder = D.Zero; - return; } - - D.visit(Numerator); - *Quotient = D.Quotient; - *Remainder = D.Remainder; } - SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator) - : SE(S), Denominator(Denominator) { - Zero = SE.getConstant(Denominator->getType(), 0); - One = SE.getConstant(Denominator->getType(), 1); - - // By default, we don't know how to divide Expr by Denominator. - // Providing the default here simplifies the rest of the code. - Quotient = Zero; - Remainder = Numerator; - } + return SE.getCouldNotCompute(); +} - // Except in the trivial case described above, we do not know how to divide - // Expr by Denominator for the following functions with empty implementation. - void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} - void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} - void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} - void visitUDivExpr(const SCEVUDivExpr *Numerator) {} - void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} - void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} - void visitUnknown(const SCEVUnknown *Numerator) {} - void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} +namespace { +struct FindUndefs { + bool Found; + FindUndefs() : Found(false) {} - void visitConstant(const SCEVConstant *Numerator) { - if (const SCEVConstant *D = dyn_cast(Denominator)) { - Quotient = SE.getConstant(sdiv(Numerator, D)); - Remainder = SE.getConstant(srem(Numerator, D)); - return; + bool follow(const SCEV *S) { + if (const SCEVUnknown *C = dyn_cast(S)) { + if (isa(C->getValue())) + Found = true; + } else if (const SCEVConstant *C = dyn_cast(S)) { + if (isa(C->getValue())) + Found = true; } - } - void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { - const SCEV *StartQ, *StartR, *StepQ, *StepR; - assert(Numerator->isAffine() && "Numerator should be affine"); - divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); - divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); - Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), - Numerator->getNoWrapFlags()); - Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), - Numerator->getNoWrapFlags()); + // Keep looking if we haven't found it yet. + return !Found; } + bool isDone() const { + // Stop recursion if we have found an undef. + return Found; + } +}; +} - void visitAddExpr(const SCEVAddExpr *Numerator) { - SmallVector Qs, Rs; - Type *Ty = Denominator->getType(); - - for (const SCEV *Op : Numerator->operands()) { - const SCEV *Q, *R; - divide(SE, Op, Denominator, &Q, &R); +// Return true when S contains at least an undef value. +static inline bool +containsUndefs(const SCEV *S) { + FindUndefs F; + SCEVTraversal ST(F); + ST.visitAll(S); - // Bail out if types do not match. - if (Ty != Q->getType() || Ty != R->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + return F.Found; +} - Qs.push_back(Q); - Rs.push_back(R); - } +namespace { +// Collect all steps of SCEV expressions. +struct SCEVCollectStrides { + ScalarEvolution &SE; + SmallVectorImpl &Strides; - if (Qs.size() == 1) { - Quotient = Qs[0]; - Remainder = Rs[0]; - return; - } + SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl &S) + : SE(SE), Strides(S) {} - Quotient = SE.getAddExpr(Qs); - Remainder = SE.getAddExpr(Rs); + bool follow(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast(S)) + Strides.push_back(AR->getStepRecurrence(SE)); + return true; } + bool isDone() const { return false; } +}; - void visitMulExpr(const SCEVMulExpr *Numerator) { - SmallVector Qs; - Type *Ty = Denominator->getType(); - - bool FoundDenominatorTerm = false; - for (const SCEV *Op : Numerator->operands()) { - // Bail out if types do not match. - if (Ty != Op->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } - - if (FoundDenominatorTerm) { - Qs.push_back(Op); - continue; - } - - // Check whether Denominator divides one of the product operands. - const SCEV *Q, *R; - divide(SE, Op, Denominator, &Q, &R); - if (!R->isZero()) { - Qs.push_back(Op); - continue; - } +// Collect all SCEVUnknown and SCEVMulExpr expressions. +struct SCEVCollectTerms { + SmallVectorImpl &Terms; - // Bail out if types do not match. - if (Ty != Q->getType()) { - Quotient = Zero; - Remainder = Numerator; - return; - } + SCEVCollectTerms(SmallVectorImpl &T) + : Terms(T) {} - FoundDenominatorTerm = true; - Qs.push_back(Q); - } + bool follow(const SCEV *S) { + if (isa(S) || isa(S)) { + if (!containsUndefs(S)) + Terms.push_back(S); - if (FoundDenominatorTerm) { - Remainder = Zero; - if (Qs.size() == 1) - Quotient = Qs[0]; - else - Quotient = SE.getMulExpr(Qs); - return; + // Stop recursion: once we collected a term, do not walk its operands. + return false; } - if (!isa(Denominator)) { - Quotient = Zero; - Remainder = Numerator; - return; - } + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; +} - // The Remainder is obtained by replacing Denominator by 0 in Numerator. - ValueToValueMap RewriteMap; - RewriteMap[cast(Denominator)->getValue()] = - cast(Zero)->getValue(); - Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); +/// Find parametric terms in this SCEVAddRecExpr. +void SCEVAddRecExpr::collectParametricTerms( + ScalarEvolution &SE, SmallVectorImpl &Terms) const { + SmallVector Strides; + SCEVCollectStrides StrideCollector(SE, Strides); + visitAll(this, StrideCollector); - if (Remainder->isZero()) { - // The Quotient is obtained by replacing Denominator by 1 in Numerator. - RewriteMap[cast(Denominator)->getValue()] = - cast(One)->getValue(); - Quotient = - SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); - return; - } + DEBUG({ + dbgs() << "Strides:\n"; + for (const SCEV *S : Strides) + dbgs() << *S << "\n"; + }); - // Quotient is (Numerator - Remainder) divided by Denominator. - const SCEV *Q, *R; - const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); - if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) { - // This SCEV does not seem to simplify: fail the division here. - Quotient = Zero; - Remainder = Numerator; - return; - } - divide(SE, Diff, Denominator, &Q, &R); - assert(R == Zero && - "(Numerator - Remainder) should evenly divide Denominator"); - Quotient = Q; + for (const SCEV *S : Strides) { + SCEVCollectTerms TermCollector(Terms); + visitAll(S, TermCollector); } -private: - ScalarEvolution &SE; - const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; -}; + DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); } static bool findArrayDimensionsRec(ScalarEvolution &SE, @@ -7270,7 +7457,7 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, for (const SCEV *&Term : Terms) { // Normalize the terms before the next call to findArrayDimensionsRec. const SCEV *Q, *R; - SCEVDivision::divide(SE, Term, Step, &Q, &R); + SCEVSDivision::divide(SE, Term, Step, &Q, &R); // Bail out when GCD does not evenly divide one of the terms. if (!R->isZero()) @@ -7407,7 +7594,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, // Divide all terms by the element size. for (const SCEV *&Term : Terms) { const SCEV *Q, *R; - SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); + SCEVSDivision::divide(SE, Term, ElementSize, &Q, &R); Term = Q; } @@ -7454,7 +7641,7 @@ void SCEVAddRecExpr::computeAccessFunctions( int Last = Sizes.size() - 1; for (int i = Last; i >= 0; i--) { const SCEV *Q, *R; - SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R); + SCEVSDivision::divide(SE, Res, Sizes[i], &Q, &R); DEBUG({ dbgs() << "Res: " << *Res << "\n"; @@ -7609,7 +7796,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { // that until everything else is done. if (U == Old) continue; - if (!Visited.insert(U)) + if (!Visited.insert(U).second) continue; if (PHINode *PN = dyn_cast(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); @@ -7638,6 +7825,7 @@ ScalarEvolution::ScalarEvolution() bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; + AT = &getAnalysis(); LI = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); DL = DLP ? &DLP->getDataLayout() : nullptr; @@ -7678,6 +7866,7 @@ void ScalarEvolution::releaseMemory() { void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequired(); diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 6933f74..5c339ee 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -162,10 +162,10 @@ ScalarEvolutionAliasAnalysis::alias(const Location &LocA, if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr)) if (alias(Location(AO ? AO : LocA.Ptr, AO ? +UnknownSize : LocA.Size, - AO ? nullptr : LocA.TBAATag), + AO ? AAMDNodes() : LocA.AATags), Location(BO ? BO : LocB.Ptr, BO ? +UnknownSize : LocB.Size, - BO ? nullptr : LocB.TBAATag)) == NoAlias) + BO ? AAMDNodes() : LocB.AATags)) == NoAlias) return NoAlias; // Forward the query to the next analysis. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 8c75b0d..bee3685 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1443,8 +1443,12 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; - if (!PredSeen.insert(HP)) + if (!PredSeen.insert(HP).second) { + // There must be an incoming value for each predecessor, even the + // duplicates! + CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); continue; + } if (L->contains(HP)) { // Insert a unit add instruction right before the terminator @@ -1707,7 +1711,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT)) { + if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AT)) { Phi->replaceAllUsesWith(V); DeadInsts.push_back(Phi); ++NumElim; diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 3ccefb0..b238fe4 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -126,7 +126,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { // Normalized form: {-2,+,1,+,2} // Denormalized form: {1,+,3,+,2} // - // However, denormalization would use the a different step expression than + // However, denormalization would use a different step expression than // normalization (see getPostIncExpr), generating the wrong final // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} if (AR->isAffine() && diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp new file mode 100644 index 0000000..f6c300a --- /dev/null +++ b/lib/Analysis/ScopedNoAliasAA.cpp @@ -0,0 +1,245 @@ +//===- ScopedNoAliasAA.cpp - Scoped No-Alias Alias Analysis ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ScopedNoAlias alias-analysis pass, which implements +// metadata-based scoped no-alias support. +// +// Alias-analysis scopes are defined by an id (which can be a string or some +// other metadata node), a domain node, and an optional descriptive string. +// A domain is defined by an id (which can be a string or some other metadata +// node), and an optional descriptive string. +// +// !dom0 = metadata !{ metadata !"domain of foo()" } +// !scope1 = metadata !{ metadata !scope1, metadata !dom0, metadata !"scope 1" } +// !scope2 = metadata !{ metadata !scope2, metadata !dom0, metadata !"scope 2" } +// +// Loads and stores can be tagged with an alias-analysis scope, and also, with +// a noalias tag for a specific scope: +// +// ... = load %ptr1, !alias.scope !{ !scope1 } +// ... = load %ptr2, !alias.scope !{ !scope1, !scope2 }, !noalias !{ !scope1 } +// +// When evaluating an aliasing query, if one of the instructions is associated +// has a set of noalias scopes in some domain that is superset of the alias +// scopes in that domain of some other instruction, then the two memory +// accesses are assumed not to alias. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +// A handy option for disabling scoped no-alias functionality. The same effect +// can also be achieved by stripping the associated metadata tags from IR, but +// this option is sometimes more convenient. +static cl::opt +EnableScopedNoAlias("enable-scoped-noalias", cl::init(true)); + +namespace { +/// AliasScopeNode - This is a simple wrapper around an MDNode which provides +/// a higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class AliasScopeNode { + const MDNode *Node; + +public: + AliasScopeNode() : Node(0) {} + explicit AliasScopeNode(const MDNode *N) : Node(N) {} + + /// getNode - Get the MDNode for this AliasScopeNode. + const MDNode *getNode() const { return Node; } + + /// getDomain - Get the MDNode for this AliasScopeNode's domain. + const MDNode *getDomain() const { + if (Node->getNumOperands() < 2) + return nullptr; + return dyn_cast_or_null(Node->getOperand(1)); + } +}; + +/// ScopedNoAliasAA - This is a simple alias analysis +/// implementation that uses scoped-noalias metadata to answer queries. +class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis { +public: + static char ID; // Class identification, replacement for typeinfo + ScopedNoAliasAA() : ImmutablePass(ID) { + initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry()); + } + + void initializePass() override { InitializeAliasAnalysis(this); } + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + void *getAdjustedAnalysisPointer(const void *PI) override { + if (PI == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + +protected: + bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; + void collectMDInDomain(const MDNode *List, const MDNode *Domain, + SmallPtrSetImpl &Nodes) const; + +private: + void getAnalysisUsage(AnalysisUsage &AU) const override; + AliasResult alias(const Location &LocA, const Location &LocB) override; + bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override; + ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; + ModRefBehavior getModRefBehavior(const Function *F) override; + ModRefResult getModRefInfo(ImmutableCallSite CS, + const Location &Loc) override; + ModRefResult getModRefInfo(ImmutableCallSite CS1, + ImmutableCallSite CS2) override; +}; +} // End of anonymous namespace + +// Register this pass... +char ScopedNoAliasAA::ID = 0; +INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias", + "Scoped NoAlias Alias Analysis", false, true, false) + +ImmutablePass *llvm::createScopedNoAliasAAPass() { + return new ScopedNoAliasAA(); +} + +void +ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +void +ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain, + SmallPtrSetImpl &Nodes) const { + for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i) + if (const MDNode *MD = dyn_cast(List->getOperand(i))) + if (AliasScopeNode(MD).getDomain() == Domain) + Nodes.insert(MD); +} + +bool +ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes, + const MDNode *NoAlias) const { + if (!Scopes || !NoAlias) + return true; + + // Collect the set of scope domains relevant to the noalias scopes. + SmallPtrSet Domains; + for (unsigned i = 0, ie = NoAlias->getNumOperands(); i != ie; ++i) + if (const MDNode *NAMD = dyn_cast(NoAlias->getOperand(i))) + if (const MDNode *Domain = AliasScopeNode(NAMD).getDomain()) + Domains.insert(Domain); + + // We alias unless, for some domain, the set of noalias scopes in that domain + // is a superset of the set of alias scopes in that domain. + for (const MDNode *Domain : Domains) { + SmallPtrSet NANodes, ScopeNodes; + collectMDInDomain(NoAlias, Domain, NANodes); + collectMDInDomain(Scopes, Domain, ScopeNodes); + if (!ScopeNodes.size()) + continue; + + // To not alias, all of the nodes in ScopeNodes must be in NANodes. + bool FoundAll = true; + for (const MDNode *SMD : ScopeNodes) + if (!NANodes.count(SMD)) { + FoundAll = false; + break; + } + + if (FoundAll) + return false; + } + + return true; +} + +AliasAnalysis::AliasResult +ScopedNoAliasAA::alias(const Location &LocA, const Location &LocB) { + if (!EnableScopedNoAlias) + return AliasAnalysis::alias(LocA, LocB); + + // Get the attached MDNodes. + const MDNode *AScopes = LocA.AATags.Scope, + *BScopes = LocB.AATags.Scope; + + const MDNode *ANoAlias = LocA.AATags.NoAlias, + *BNoAlias = LocB.AATags.NoAlias; + + if (!mayAliasInScopes(AScopes, BNoAlias)) + return NoAlias; + + if (!mayAliasInScopes(BScopes, ANoAlias)) + return NoAlias; + + // If they may alias, chain to the next AliasAnalysis. + return AliasAnalysis::alias(LocA, LocB); +} + +bool ScopedNoAliasAA::pointsToConstantMemory(const Location &Loc, + bool OrLocal) { + return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +} + +AliasAnalysis::ModRefBehavior +ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) { + return AliasAnalysis::getModRefBehavior(CS); +} + +AliasAnalysis::ModRefBehavior +ScopedNoAliasAA::getModRefBehavior(const Function *F) { + return AliasAnalysis::getModRefBehavior(F); +} + +AliasAnalysis::ModRefResult +ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { + if (!EnableScopedNoAlias) + return AliasAnalysis::getModRefInfo(CS, Loc); + + if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( + LLVMContext::MD_noalias))) + return NoModRef; + + if (!mayAliasInScopes( + CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + Loc.AATags.NoAlias)) + return NoModRef; + + return AliasAnalysis::getModRefInfo(CS, Loc); +} + +AliasAnalysis::ModRefResult +ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { + if (!EnableScopedNoAlias) + return AliasAnalysis::getModRefInfo(CS1, CS2); + + if (!mayAliasInScopes( + CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) + return NoModRef; + + if (!mayAliasInScopes( + CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), + CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) + return NoModRef; + + return AliasAnalysis::getModRefInfo(CS1, CS2); +} + diff --git a/lib/Analysis/StratifiedSets.h b/lib/Analysis/StratifiedSets.h new file mode 100644 index 0000000..fd3fbc0 --- /dev/null +++ b/lib/Analysis/StratifiedSets.h @@ -0,0 +1,692 @@ +//===- StratifiedSets.h - Abstract stratified sets implementation. --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_STRATIFIEDSETS_H +#define LLVM_ADT_STRATIFIEDSETS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Compiler.h" +#include +#include +#include +#include +#include +#include +#include + +namespace llvm { +// \brief An index into Stratified Sets. +typedef unsigned StratifiedIndex; +// NOTE: ^ This can't be a short -- bootstrapping clang has a case where +// ~1M sets exist. + +// \brief Container of information related to a value in a StratifiedSet. +struct StratifiedInfo { + StratifiedIndex Index; + // For field sensitivity, etc. we can tack attributes on to this struct. +}; + +// The number of attributes that StratifiedAttrs should contain. Attributes are +// described below, and 32 was an arbitrary choice because it fits nicely in 32 +// bits (because we use a bitset for StratifiedAttrs). +static const unsigned NumStratifiedAttrs = 32; + +// These are attributes that the users of StratifiedSets/StratifiedSetBuilders +// may use for various purposes. These also have the special property of that +// they are merged down. So, if set A is above set B, and one decides to set an +// attribute in set A, then the attribute will automatically be set in set B. +typedef std::bitset StratifiedAttrs; + +// \brief A "link" between two StratifiedSets. +struct StratifiedLink { + // \brief This is a value used to signify "does not exist" where + // the StratifiedIndex type is used. This is used instead of + // Optional because Optional would + // eat up a considerable amount of extra memory, after struct + // padding/alignment is taken into account. + static const StratifiedIndex SetSentinel; + + // \brief The index for the set "above" current + StratifiedIndex Above; + + // \brief The link for the set "below" current + StratifiedIndex Below; + + // \brief Attributes for these StratifiedSets. + StratifiedAttrs Attrs; + + StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {} + + bool hasBelow() const { return Below != SetSentinel; } + bool hasAbove() const { return Above != SetSentinel; } + + void clearBelow() { Below = SetSentinel; } + void clearAbove() { Above = SetSentinel; } +}; + +// \brief These are stratified sets, as described in "Fast algorithms for +// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M +// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets +// of Value*s. If two Value*s are in the same set, or if both sets have +// overlapping attributes, then the Value*s are said to alias. +// +// Sets may be related by position, meaning that one set may be considered as +// above or below another. In CFL Alias Analysis, this gives us an indication +// of how two variables are related; if the set of variable A is below a set +// containing variable B, then at some point, a variable that has interacted +// with B (or B itself) was either used in order to extract the variable A, or +// was used as storage of variable A. +// +// Sets may also have attributes (as noted above). These attributes are +// generally used for noting whether a variable in the set has interacted with +// a variable whose origins we don't quite know (i.e. globals/arguments), or if +// the variable may have had operations performed on it (modified in a function +// call). All attributes that exist in a set A must exist in all sets marked as +// below set A. +template class StratifiedSets { +public: + StratifiedSets() {} + + StratifiedSets(DenseMap Map, + std::vector Links) + : Values(std::move(Map)), Links(std::move(Links)) {} + + StratifiedSets(StratifiedSets &&Other) { *this = std::move(Other); } + + StratifiedSets &operator=(StratifiedSets &&Other) { + Values = std::move(Other.Values); + Links = std::move(Other.Links); + return *this; + } + + Optional find(const T &Elem) const { + auto Iter = Values.find(Elem); + if (Iter == Values.end()) { + return NoneType(); + } + return Iter->second; + } + + const StratifiedLink &getLink(StratifiedIndex Index) const { + assert(inbounds(Index)); + return Links[Index]; + } + +private: + DenseMap Values; + std::vector Links; + + bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); } +}; + +// \brief Generic Builder class that produces StratifiedSets instances. +// +// The goal of this builder is to efficiently produce correct StratifiedSets +// instances. To this end, we use a few tricks: +// > Set chains (A method for linking sets together) +// > Set remaps (A method for marking a set as an alias [irony?] of another) +// +// ==== Set chains ==== +// This builder has a notion of some value A being above, below, or with some +// other value B: +// > The `A above B` relationship implies that there is a reference edge going +// from A to B. Namely, it notes that A can store anything in B's set. +// > The `A below B` relationship is the opposite of `A above B`. It implies +// that there's a dereference edge going from A to B. +// > The `A with B` relationship states that there's an assignment edge going +// from A to B, and that A and B should be treated as equals. +// +// As an example, take the following code snippet: +// +// %a = alloca i32, align 4 +// %ap = alloca i32*, align 8 +// %app = alloca i32**, align 8 +// store %a, %ap +// store %ap, %app +// %aw = getelementptr %ap, 0 +// +// Given this, the follow relations exist: +// - %a below %ap & %ap above %a +// - %ap below %app & %app above %ap +// - %aw with %ap & %ap with %aw +// +// These relations produce the following sets: +// [{%a}, {%ap, %aw}, {%app}] +// +// ...Which states that the only MayAlias relationship in the above program is +// between %ap and %aw. +// +// Life gets more complicated when we actually have logic in our programs. So, +// we either must remove this logic from our programs, or make consessions for +// it in our AA algorithms. In this case, we have decided to select the latter +// option. +// +// First complication: Conditionals +// Motivation: +// %ad = alloca int, align 4 +// %a = alloca int*, align 8 +// %b = alloca int*, align 8 +// %bp = alloca int**, align 8 +// %c = call i1 @SomeFunc() +// %k = select %c, %ad, %bp +// store %ad, %a +// store %b, %bp +// +// %k has 'with' edges to both %a and %b, which ordinarily would not be linked +// together. So, we merge the set that contains %a with the set that contains +// %b. We then recursively merge the set above %a with the set above %b, and +// the set below %a with the set below %b, etc. Ultimately, the sets for this +// program would end up like: {%ad}, {%a, %b, %k}, {%bp}, where {%ad} is below +// {%a, %b, %c} is below {%ad}. +// +// Second complication: Arbitrary casts +// Motivation: +// %ip = alloca int*, align 8 +// %ipp = alloca int**, align 8 +// %i = bitcast ipp to int +// store %ip, %ipp +// store %i, %ip +// +// This is impossible to construct with any of the rules above, because a set +// containing both {%i, %ipp} is supposed to exist, the set with %i is supposed +// to be below the set with %ip, and the set with %ip is supposed to be below +// the set with %ipp. Because we don't allow circular relationships like this, +// we merge all concerned sets into one. So, the above code would generate a +// single StratifiedSet: {%ip, %ipp, %i}. +// +// ==== Set remaps ==== +// More of an implementation detail than anything -- when merging sets, we need +// to update the numbers of all of the elements mapped to those sets. Rather +// than doing this at each merge, we note in the BuilderLink structure that a +// remap has occurred, and use this information so we can defer renumbering set +// elements until build time. +template class StratifiedSetsBuilder { + // \brief Represents a Stratified Set, with information about the Stratified + // Set above it, the set below it, and whether the current set has been + // remapped to another. + struct BuilderLink { + const StratifiedIndex Number; + + BuilderLink(StratifiedIndex N) : Number(N) { + Remap = StratifiedLink::SetSentinel; + } + + bool hasAbove() const { + assert(!isRemapped()); + return Link.hasAbove(); + } + + bool hasBelow() const { + assert(!isRemapped()); + return Link.hasBelow(); + } + + void setBelow(StratifiedIndex I) { + assert(!isRemapped()); + Link.Below = I; + } + + void setAbove(StratifiedIndex I) { + assert(!isRemapped()); + Link.Above = I; + } + + void clearBelow() { + assert(!isRemapped()); + Link.clearBelow(); + } + + void clearAbove() { + assert(!isRemapped()); + Link.clearAbove(); + } + + StratifiedIndex getBelow() const { + assert(!isRemapped()); + assert(hasBelow()); + return Link.Below; + } + + StratifiedIndex getAbove() const { + assert(!isRemapped()); + assert(hasAbove()); + return Link.Above; + } + + StratifiedAttrs &getAttrs() { + assert(!isRemapped()); + return Link.Attrs; + } + + void setAttr(unsigned index) { + assert(!isRemapped()); + assert(index < NumStratifiedAttrs); + Link.Attrs.set(index); + } + + void setAttrs(const StratifiedAttrs &other) { + assert(!isRemapped()); + Link.Attrs |= other; + } + + bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; } + + // \brief For initial remapping to another set + void remapTo(StratifiedIndex Other) { + assert(!isRemapped()); + Remap = Other; + } + + StratifiedIndex getRemapIndex() const { + assert(isRemapped()); + return Remap; + } + + // \brief Should only be called when we're already remapped. + void updateRemap(StratifiedIndex Other) { + assert(isRemapped()); + Remap = Other; + } + + // \brief Prefer the above functions to calling things directly on what's + // returned from this -- they guard against unexpected calls when the + // current BuilderLink is remapped. + const StratifiedLink &getLink() const { return Link; } + + private: + StratifiedLink Link; + StratifiedIndex Remap; + }; + + // \brief This function performs all of the set unioning/value renumbering + // that we've been putting off, and generates a vector that + // may be placed in a StratifiedSets instance. + void finalizeSets(std::vector &StratLinks) { + DenseMap Remaps; + for (auto &Link : Links) { + if (Link.isRemapped()) { + continue; + } + + StratifiedIndex Number = StratLinks.size(); + Remaps.insert(std::make_pair(Link.Number, Number)); + StratLinks.push_back(Link.getLink()); + } + + for (auto &Link : StratLinks) { + if (Link.hasAbove()) { + auto &Above = linksAt(Link.Above); + auto Iter = Remaps.find(Above.Number); + assert(Iter != Remaps.end()); + Link.Above = Iter->second; + } + + if (Link.hasBelow()) { + auto &Below = linksAt(Link.Below); + auto Iter = Remaps.find(Below.Number); + assert(Iter != Remaps.end()); + Link.Below = Iter->second; + } + } + + for (auto &Pair : Values) { + auto &Info = Pair.second; + auto &Link = linksAt(Info.Index); + auto Iter = Remaps.find(Link.Number); + assert(Iter != Remaps.end()); + Info.Index = Iter->second; + } + } + + // \brief There's a guarantee in StratifiedLink where all bits set in a + // Link.externals will be set in all Link.externals "below" it. + static void propagateAttrs(std::vector &Links) { + const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) { + const auto *Link = &Links[Idx]; + while (Link->hasAbove()) { + Idx = Link->Above; + Link = &Links[Idx]; + } + return Idx; + }; + + SmallSet Visited; + for (unsigned I = 0, E = Links.size(); I < E; ++I) { + auto CurrentIndex = getHighestParentAbove(I); + if (!Visited.insert(CurrentIndex).second) { + continue; + } + + while (Links[CurrentIndex].hasBelow()) { + auto &CurrentBits = Links[CurrentIndex].Attrs; + auto NextIndex = Links[CurrentIndex].Below; + auto &NextBits = Links[NextIndex].Attrs; + NextBits |= CurrentBits; + CurrentIndex = NextIndex; + } + } + } + +public: + // \brief Builds a StratifiedSet from the information we've been given since + // either construction or the prior build() call. + StratifiedSets build() { + std::vector StratLinks; + finalizeSets(StratLinks); + propagateAttrs(StratLinks); + Links.clear(); + return StratifiedSets(std::move(Values), std::move(StratLinks)); + } + + std::size_t size() const { return Values.size(); } + std::size_t numSets() const { return Links.size(); } + + bool has(const T &Elem) const { return get(Elem).hasValue(); } + + bool add(const T &Main) { + if (get(Main).hasValue()) + return false; + + auto NewIndex = getNewUnlinkedIndex(); + return addAtMerging(Main, NewIndex); + } + + // \brief Restructures the stratified sets as necessary to make "ToAdd" in a + // set above "Main". There are some cases where this is not possible (see + // above), so we merge them such that ToAdd and Main are in the same set. + bool addAbove(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto Index = *indexOf(Main); + if (!linksAt(Index).hasAbove()) + addLinkAbove(Index); + + auto Above = linksAt(Index).getAbove(); + return addAtMerging(ToAdd, Above); + } + + // \brief Restructures the stratified sets as necessary to make "ToAdd" in a + // set below "Main". There are some cases where this is not possible (see + // above), so we merge them such that ToAdd and Main are in the same set. + bool addBelow(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto Index = *indexOf(Main); + if (!linksAt(Index).hasBelow()) + addLinkBelow(Index); + + auto Below = linksAt(Index).getBelow(); + return addAtMerging(ToAdd, Below); + } + + bool addWith(const T &Main, const T &ToAdd) { + assert(has(Main)); + auto MainIndex = *indexOf(Main); + return addAtMerging(ToAdd, MainIndex); + } + + void noteAttribute(const T &Main, unsigned AttrNum) { + assert(has(Main)); + assert(AttrNum < StratifiedLink::SetSentinel); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + Link.setAttr(AttrNum); + } + + void noteAttributes(const T &Main, const StratifiedAttrs &NewAttrs) { + assert(has(Main)); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + Link.setAttrs(NewAttrs); + } + + StratifiedAttrs getAttributes(const T &Main) { + assert(has(Main)); + auto *Info = *get(Main); + auto *Link = &linksAt(Info->Index); + auto Attrs = Link->getAttrs(); + while (Link->hasAbove()) { + Link = &linksAt(Link->getAbove()); + Attrs |= Link->getAttrs(); + } + + return Attrs; + } + + bool getAttribute(const T &Main, unsigned AttrNum) { + assert(AttrNum < StratifiedLink::SetSentinel); + auto Attrs = getAttributes(Main); + return Attrs[AttrNum]; + } + + // \brief Gets the attributes that have been applied to the set that Main + // belongs to. It ignores attributes in any sets above the one that Main + // resides in. + StratifiedAttrs getRawAttributes(const T &Main) { + assert(has(Main)); + auto *Info = *get(Main); + auto &Link = linksAt(Info->Index); + return Link.getAttrs(); + } + + // \brief Gets an attribute from the attributes that have been applied to the + // set that Main belongs to. It ignores attributes in any sets above the one + // that Main resides in. + bool getRawAttribute(const T &Main, unsigned AttrNum) { + assert(AttrNum < StratifiedLink::SetSentinel); + auto Attrs = getRawAttributes(Main); + return Attrs[AttrNum]; + } + +private: + DenseMap Values; + std::vector Links; + + // \brief Adds the given element at the given index, merging sets if + // necessary. + bool addAtMerging(const T &ToAdd, StratifiedIndex Index) { + StratifiedInfo Info = {Index}; + auto Pair = Values.insert(std::make_pair(ToAdd, Info)); + if (Pair.second) + return true; + + auto &Iter = Pair.first; + auto &IterSet = linksAt(Iter->second.Index); + auto &ReqSet = linksAt(Index); + + // Failed to add where we wanted to. Merge the sets. + if (&IterSet != &ReqSet) + merge(IterSet.Number, ReqSet.Number); + + return false; + } + + // \brief Gets the BuilderLink at the given index, taking set remapping into + // account. + BuilderLink &linksAt(StratifiedIndex Index) { + auto *Start = &Links[Index]; + if (!Start->isRemapped()) + return *Start; + + auto *Current = Start; + while (Current->isRemapped()) + Current = &Links[Current->getRemapIndex()]; + + auto NewRemap = Current->Number; + + // Run through everything that has yet to be updated, and update them to + // remap to NewRemap + Current = Start; + while (Current->isRemapped()) { + auto *Next = &Links[Current->getRemapIndex()]; + Current->updateRemap(NewRemap); + Current = Next; + } + + return *Current; + } + + // \brief Merges two sets into one another. Assumes that these sets are not + // already one in the same + void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) { + assert(inbounds(Idx1) && inbounds(Idx2)); + assert(&linksAt(Idx1) != &linksAt(Idx2) && + "Merging a set into itself is not allowed"); + + // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge + // both the + // given sets, and all sets between them, into one. + if (tryMergeUpwards(Idx1, Idx2)) + return; + + if (tryMergeUpwards(Idx2, Idx1)) + return; + + // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`. + // We therefore need to merge the two chains together. + mergeDirect(Idx1, Idx2); + } + + // \brief Merges two sets assuming that the set at `Idx1` is unreachable from + // traversing above or below the set at `Idx2`. + void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) { + assert(inbounds(Idx1) && inbounds(Idx2)); + + auto *LinksInto = &linksAt(Idx1); + auto *LinksFrom = &linksAt(Idx2); + // Merging everything above LinksInto then proceeding to merge everything + // below LinksInto becomes problematic, so we go as far "up" as possible! + while (LinksInto->hasAbove() && LinksFrom->hasAbove()) { + LinksInto = &linksAt(LinksInto->getAbove()); + LinksFrom = &linksAt(LinksFrom->getAbove()); + } + + if (LinksFrom->hasAbove()) { + LinksInto->setAbove(LinksFrom->getAbove()); + auto &NewAbove = linksAt(LinksInto->getAbove()); + NewAbove.setBelow(LinksInto->Number); + } + + // Merging strategy: + // > If neither has links below, stop. + // > If only `LinksInto` has links below, stop. + // > If only `LinksFrom` has links below, reset `LinksInto.Below` to + // match `LinksFrom.Below` + // > If both have links above, deal with those next. + while (LinksInto->hasBelow() && LinksFrom->hasBelow()) { + auto &FromAttrs = LinksFrom->getAttrs(); + LinksInto->setAttrs(FromAttrs); + + // Remap needs to happen after getBelow(), but before + // assignment of LinksFrom + auto *NewLinksFrom = &linksAt(LinksFrom->getBelow()); + LinksFrom->remapTo(LinksInto->Number); + LinksFrom = NewLinksFrom; + LinksInto = &linksAt(LinksInto->getBelow()); + } + + if (LinksFrom->hasBelow()) { + LinksInto->setBelow(LinksFrom->getBelow()); + auto &NewBelow = linksAt(LinksInto->getBelow()); + NewBelow.setAbove(LinksInto->Number); + } + + LinksFrom->remapTo(LinksInto->Number); + } + + // \brief Checks to see if lowerIndex is at a level lower than upperIndex. + // If so, it will merge lowerIndex with upperIndex (and all of the sets + // between) and return true. Otherwise, it will return false. + bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) { + assert(inbounds(LowerIndex) && inbounds(UpperIndex)); + auto *Lower = &linksAt(LowerIndex); + auto *Upper = &linksAt(UpperIndex); + if (Lower == Upper) + return true; + + SmallVector Found; + auto *Current = Lower; + auto Attrs = Current->getAttrs(); + while (Current->hasAbove() && Current != Upper) { + Found.push_back(Current); + Attrs |= Current->getAttrs(); + Current = &linksAt(Current->getAbove()); + } + + if (Current != Upper) + return false; + + Upper->setAttrs(Attrs); + + if (Lower->hasBelow()) { + auto NewBelowIndex = Lower->getBelow(); + Upper->setBelow(NewBelowIndex); + auto &NewBelow = linksAt(NewBelowIndex); + NewBelow.setAbove(UpperIndex); + } else { + Upper->clearBelow(); + } + + for (const auto &Ptr : Found) + Ptr->remapTo(Upper->Number); + + return true; + } + + Optional get(const T &Val) const { + auto Result = Values.find(Val); + if (Result == Values.end()) + return NoneType(); + return &Result->second; + } + + Optional get(const T &Val) { + auto Result = Values.find(Val); + if (Result == Values.end()) + return NoneType(); + return &Result->second; + } + + Optional indexOf(const T &Val) { + auto MaybeVal = get(Val); + if (!MaybeVal.hasValue()) + return NoneType(); + auto *Info = *MaybeVal; + auto &Link = linksAt(Info->Index); + return Link.Number; + } + + StratifiedIndex addLinkBelow(StratifiedIndex Set) { + auto At = addLinks(); + Links[Set].setBelow(At); + Links[At].setAbove(Set); + return At; + } + + StratifiedIndex addLinkAbove(StratifiedIndex Set) { + auto At = addLinks(); + Links[At].setBelow(Set); + Links[Set].setAbove(At); + return At; + } + + StratifiedIndex getNewUnlinkedIndex() { return addLinks(); } + + StratifiedIndex addLinks() { + auto Link = Links.size(); + Links.push_back(BuilderLink(Link)); + return Link; + } + + bool inbounds(StratifiedIndex N) const { return N < Links.size(); } +}; +} +#endif // LLVM_ADT_STRATIFIEDSETS_H diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index cdb0b79..c1ffb9d 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -87,9 +87,10 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return PrevTTI->isLoweredToCall(F); } -void TargetTransformInfo::getUnrollingPreferences(Loop *L, - UnrollingPreferences &UP) const { - PrevTTI->getUnrollingPreferences(L, UP); +void +TargetTransformInfo::getUnrollingPreferences(const Function *F, Loop *L, + UnrollingPreferences &UP) const { + PrevTTI->getUnrollingPreferences(F, L, UP); } bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { @@ -167,15 +168,16 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return PrevTTI->getRegisterBitWidth(Vector); } -unsigned TargetTransformInfo::getMaximumUnrollFactor() const { - return PrevTTI->getMaximumUnrollFactor(); +unsigned TargetTransformInfo::getMaxInterleaveFactor() const { + return PrevTTI->getMaxInterleaveFactor(); } -unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, - Type *Ty, - OperandValueKind Op1Info, - OperandValueKind Op2Info) const { - return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); +unsigned TargetTransformInfo::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Op1Info, + OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo) const { + return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo); } unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, @@ -230,6 +232,11 @@ unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise); } +unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef Tys) + const { + return PrevTTI->getCostOfKeepingLiveOverCall(Tys); +} + namespace { struct NoTTI final : ImmutablePass, TargetTransformInfo { @@ -239,7 +246,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { initializeNoTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() override { + void initializePass() override { // Note that this subclass is special, and must *not* call initializeTTI as // it does not chain. TopTTI = this; @@ -248,7 +255,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { DL = DLP ? &DLP->getDataLayout() : nullptr; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { // Note that this subclass is special, and must *not* call // TTI::getAnalysisUsage as it breaks the recursion. } @@ -257,7 +264,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) override { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo*)this; return this; @@ -385,6 +392,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { // FIXME: This is wrong for libc intrinsics. return TCC_Basic; + case Intrinsic::annotation: + case Intrinsic::assume: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::invariant_start: @@ -466,6 +475,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { // These will all likely lower to a single selection DAG node. if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "fmin" || Name == "fminf" || Name == "fminl" || + Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") return false; @@ -480,8 +491,8 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { return true; } - void getUnrollingPreferences(Loop *, UnrollingPreferences &) const override { - } + void getUnrollingPreferences(const Function *, Loop *, + UnrollingPreferences &) const override {} bool isLegalAddImmediate(int64_t Imm) const override { return false; @@ -558,12 +569,13 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { return 32; } - unsigned getMaximumUnrollFactor() const override { + unsigned getMaxInterleaveFactor() const override { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind) const override { + OperandValueKind, OperandValueProperties, + OperandValueProperties) const override { return 1; } @@ -612,6 +624,11 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { unsigned getReductionCost(unsigned, Type *, bool) const override { return 1; } + + unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) const override { + return 0; + } + }; } // end anonymous namespace diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index f36f6f8..f347eb5 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -454,9 +454,9 @@ TypeBasedAliasAnalysis::alias(const Location &LocA, // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must // be conservative. - const MDNode *AM = LocA.TBAATag; + const MDNode *AM = LocA.AATags.TBAA; if (!AM) return AliasAnalysis::alias(LocA, LocB); - const MDNode *BM = LocB.TBAATag; + const MDNode *BM = LocB.AATags.TBAA; if (!BM) return AliasAnalysis::alias(LocA, LocB); // If they may alias, chain to the next AliasAnalysis. @@ -472,7 +472,7 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, if (!EnableTBAA) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); - const MDNode *M = Loc.TBAATag; + const MDNode *M = Loc.AATags.TBAA; if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); // If this is an "immutable" type, we can assume the pointer is pointing @@ -513,9 +513,9 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (!EnableTBAA) return AliasAnalysis::getModRefInfo(CS, Loc); - if (const MDNode *L = Loc.TBAATag) + if (const MDNode *L = Loc.AATags.TBAA) if (const MDNode *M = - CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(L, M)) return NoModRef; @@ -529,9 +529,9 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, return AliasAnalysis::getModRefInfo(CS1, CS2); if (const MDNode *M1 = - CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (const MDNode *M2 = - CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) + CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) if (!Aliases(M1, M2)) return NoModRef; @@ -611,3 +611,24 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; return MDNode::get(A->getContext(), Ops); } + +void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { + if (Merge) + N.TBAA = + MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa)); + else + N.TBAA = getMetadata(LLVMContext::MD_tbaa); + + if (Merge) + N.Scope = + MDNode::intersect(N.Scope, getMetadata(LLVMContext::MD_alias_scope)); + else + N.Scope = getMetadata(LLVMContext::MD_alias_scope); + + if (Merge) + N.NoAlias = + MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias)); + else + N.NoAlias = getMetadata(LLVMContext::MD_noalias); +} + diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 5264745..e9bbf83 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/AssumptionTracker.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -20,6 +21,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" @@ -29,6 +31,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include using namespace llvm; @@ -36,8 +39,8 @@ using namespace llvm::PatternMatch; const unsigned MaxDepth = 6; -/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if -/// unknown returns 0). For vector types, returns the element type's bitwidth. +/// Returns the bitwidth of the given scalar or pointer type (if unknown returns +/// 0). For vector types, returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; @@ -45,10 +48,125 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; } +// Many of these functions have internal versions that take an assumption +// exclusion set. This is because of the potential for mutual recursion to +// cause computeKnownBits to repeatedly visit the same assume intrinsic. The +// classic case of this is assume(x = y), which will attempt to determine +// bits in x from bits in y, which will attempt to determine bits in y from +// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call +// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and +// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so on. +typedef SmallPtrSet ExclInvsSet; + +namespace { +// Simplifying using an assume can only be done in a particular control-flow +// context (the context instruction provides that context). If an assume and +// the context instruction are not in the same block then the DT helps in +// figuring out if we can use it. +struct Query { + ExclInvsSet ExclInvs; + AssumptionTracker *AT; + const Instruction *CxtI; + const DominatorTree *DT; + + Query(AssumptionTracker *AT = nullptr, const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr) + : AT(AT), CxtI(CxtI), DT(DT) {} + + Query(const Query &Q, const Value *NewExcl) + : ExclInvs(Q.ExclInvs), AT(Q.AT), CxtI(Q.CxtI), DT(Q.DT) { + ExclInvs.insert(NewExcl); + } +}; +} // end anonymous namespace + +// Given the provided Value and, potentially, a context instruction, return +// the preferred context instruction (if any). +static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { + // If we've been provided with a context instruction, then use that (provided + // it has been inserted). + if (CxtI && CxtI->getParent()) + return CxtI; + + // If the value is really an already-inserted instruction, then use that. + CxtI = dyn_cast(V); + if (CxtI && CxtI->getParent()) + return CxtI; + + return nullptr; +} + +static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth, + const Query &Q); + +void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth, + AssumptionTracker *AT, const Instruction *CxtI, + const DominatorTree *DT) { + ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth, + Query(AT, safeCxtI(V, CxtI), DT)); +} + +static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const DataLayout *TD, unsigned Depth, + const Query &Q); + +void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const DataLayout *TD, unsigned Depth, + AssumptionTracker *AT, const Instruction *CxtI, + const DominatorTree *DT) { + ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth, + Query(AT, safeCxtI(V, CxtI), DT)); +} + +static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, + const Query &Q); + +bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, + AssumptionTracker *AT, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth, + Query(AT, safeCxtI(V, CxtI), DT)); +} + +static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, + const Query &Q); + +bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, + AssumptionTracker *AT, const Instruction *CxtI, + const DominatorTree *DT) { + return ::isKnownNonZero(V, TD, Depth, Query(AT, safeCxtI(V, CxtI), DT)); +} + +static bool MaskedValueIsZero(Value *V, const APInt &Mask, + const DataLayout *TD, unsigned Depth, + const Query &Q); + +bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, + const DataLayout *TD, unsigned Depth, + AssumptionTracker *AT, const Instruction *CxtI, + const DominatorTree *DT) { + return ::MaskedValueIsZero(V, Mask, TD, Depth, + Query(AT, safeCxtI(V, CxtI), DT)); +} + +static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, + unsigned Depth, const Query &Q); + +unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, + unsigned Depth, AssumptionTracker *AT, + const Instruction *CxtI, + const DominatorTree *DT) { + return ::ComputeNumSignBits(V, TD, Depth, Query(AT, safeCxtI(V, CxtI), DT)); +} + static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth, + const Query &Q) { if (!Add) { if (ConstantInt *CLHS = dyn_cast(Op0)) { // We know that the top bits of C-X are clear if X contains less bits @@ -59,7 +177,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -75,55 +193,51 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned BitWidth = KnownZero.getBitWidth(); - // If one of the operands has trailing zeros, then the bits that the - // other operand has in those bit positions will be preserved in the - // result. For an add, this works with either operand. For a subtract, - // this only works if the known zeros are in the right operand. + // If an initial sequence of bits in the result is not needed, the + // corresponding bits in the operands are not needed. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); - unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - - llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); - - // Determine which operand has more trailing zeros, and use that - // many bits from the other operand. - if (LHSKnownZeroOut > RHSKnownZeroOut) { - if (Add) { - APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); - KnownZero |= KnownZero2 & Mask; - KnownOne |= KnownOne2 & Mask; - } else { - // If the known zeros are in the left operand for a subtract, - // fall back to the minimum known zeros in both operands. - KnownZero |= APInt::getLowBitsSet(BitWidth, - std::min(LHSKnownZeroOut, - RHSKnownZeroOut)); - } - } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { - APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); - KnownZero |= LHSKnownZero & Mask; - KnownOne |= LHSKnownOne & Mask; + computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q); + computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q); + + // Carry in a 1 for a subtract, rather than a 0. + APInt CarryIn(BitWidth, 0); + if (!Add) { + // Sum = LHS + ~RHS + 1 + std::swap(KnownZero2, KnownOne2); + CarryIn.setBit(0); } + APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn; + APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn; + + // Compute known bits of the carry. + APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2); + APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2; + + // Compute set of known bits (where all three relevant bits are known). + APInt LHSKnown = LHSKnownZero | LHSKnownOne; + APInt RHSKnown = KnownZero2 | KnownOne2; + APInt CarryKnown = CarryKnownZero | CarryKnownOne; + APInt Known = LHSKnown & RHSKnown & CarryKnown; + + assert((PossibleSumZero & Known) == (PossibleSumOne & Known) && + "known bits of sum differ"); + + // Compute known bits of the result. + KnownZero = ~PossibleSumOne & Known; + KnownOne = PossibleSumOne & Known; + // Are we still trying to solve for the sign bit? - if (!KnownZero.isNegative() && !KnownOne.isNegative()) { + if (!Known.isNegative()) { if (NSW) { - if (Add) { - // Adding two positive numbers can't wrap into negative - if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); - // and adding two negative numbers can't wrap into positive. - else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); - } else { - // Subtracting a negative number from a positive one can't wrap - if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); - // neither can subtracting a positive number from a negative one. - else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); - } + // Adding two non-negative numbers, or subtracting a negative number from + // a non-negative one, can't wrap into negative. + if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // Adding two negative numbers, or subtracting a non-negative number from + // a negative one, can't wrap into non-negative. + else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); } } } @@ -131,10 +245,11 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { + const DataLayout *TD, unsigned Depth, + const Query &Q) { unsigned BitWidth = KnownZero.getBitWidth(); - computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -155,9 +270,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, // negative or zero. if (!isKnownNonNegative) isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && - isKnownNonZero(Op0, TD, Depth)) || + isKnownNonZero(Op0, TD, Depth, Q)) || (isKnownNegativeOp0 && isKnownNonNegativeOp1 && - isKnownNonZero(Op1, TD, Depth)); + isKnownNonZero(Op1, TD, Depth, Q)); } } @@ -209,6 +324,410 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); } +static bool isEphemeralValueOf(Instruction *I, const Value *E) { + SmallVector WorkSet(1, I); + SmallPtrSet Visited; + SmallPtrSet EphValues; + + while (!WorkSet.empty()) { + const Value *V = WorkSet.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + // If all uses of this value are ephemeral, then so is this value. + bool FoundNEUse = false; + for (const User *I : V->users()) + if (!EphValues.count(I)) { + FoundNEUse = true; + break; + } + + if (!FoundNEUse) { + if (V == E) + return true; + + EphValues.insert(V); + if (const User *U = dyn_cast(V)) + for (User::const_op_iterator J = U->op_begin(), JE = U->op_end(); + J != JE; ++J) { + if (isSafeToSpeculativelyExecute(*J)) + WorkSet.push_back(*J); + } + } + } + + return false; +} + +// Is this an intrinsic that cannot be speculated but also cannot trap? +static bool isAssumeLikeIntrinsic(const Instruction *I) { + if (const CallInst *CI = dyn_cast(I)) + if (Function *F = CI->getCalledFunction()) + switch (F->getIntrinsicID()) { + default: break; + // FIXME: This list is repeated from NoTTI::getIntrinsicCost. + case Intrinsic::assume: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + return true; + } + + return false; +} + +static bool isValidAssumeForContext(Value *V, const Query &Q, + const DataLayout *DL) { + Instruction *Inv = cast(V); + + // There are two restrictions on the use of an assume: + // 1. The assume must dominate the context (or the control flow must + // reach the assume whenever it reaches the context). + // 2. The context must not be in the assume's set of ephemeral values + // (otherwise we will use the assume to prove that the condition + // feeding the assume is trivially true, thus causing the removal of + // the assume). + + if (Q.DT) { + if (Q.DT->dominates(Inv, Q.CxtI)) { + return true; + } else if (Inv->getParent() == Q.CxtI->getParent()) { + // The context comes first, but they're both in the same block. Make sure + // there is nothing in between that might interrupt the control flow. + for (BasicBlock::const_iterator I = + std::next(BasicBlock::const_iterator(Q.CxtI)), + IE(Inv); I != IE; ++I) + if (!isSafeToSpeculativelyExecute(I, DL) && + !isAssumeLikeIntrinsic(I)) + return false; + + return !isEphemeralValueOf(Inv, Q.CxtI); + } + + return false; + } + + // When we don't have a DT, we do a limited search... + if (Inv->getParent() == Q.CxtI->getParent()->getSinglePredecessor()) { + return true; + } else if (Inv->getParent() == Q.CxtI->getParent()) { + // Search forward from the assume until we reach the context (or the end + // of the block); the common case is that the assume will come first. + for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)), + IE = Inv->getParent()->end(); I != IE; ++I) + if (I == Q.CxtI) + return true; + + // The context must come first... + for (BasicBlock::const_iterator I = + std::next(BasicBlock::const_iterator(Q.CxtI)), + IE(Inv); I != IE; ++I) + if (!isSafeToSpeculativelyExecute(I, DL) && + !isAssumeLikeIntrinsic(I)) + return false; + + return !isEphemeralValueOf(Inv, Q.CxtI); + } + + return false; +} + +bool llvm::isValidAssumeForContext(const Instruction *I, + const Instruction *CxtI, + const DataLayout *DL, + const DominatorTree *DT) { + return ::isValidAssumeForContext(const_cast(I), + Query(nullptr, CxtI, DT), DL); +} + +template +inline match_combine_or, + CmpClass_match> +m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { + return m_CombineOr(m_ICmp(Pred, L, R), m_ICmp(Pred, R, L)); +} + +template +inline match_combine_or, + BinaryOp_match> +m_c_And(const LHS &L, const RHS &R) { + return m_CombineOr(m_And(L, R), m_And(R, L)); +} + +template +inline match_combine_or, + BinaryOp_match> +m_c_Or(const LHS &L, const RHS &R) { + return m_CombineOr(m_Or(L, R), m_Or(R, L)); +} + +template +inline match_combine_or, + BinaryOp_match> +m_c_Xor(const LHS &L, const RHS &R) { + return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); +} + +static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, + APInt &KnownOne, + const DataLayout *DL, + unsigned Depth, const Query &Q) { + // Use of assumptions is context-sensitive. If we don't have a context, we + // cannot use them! + if (!Q.AT || !Q.CxtI) + return; + + unsigned BitWidth = KnownZero.getBitWidth(); + + Function *F = const_cast(Q.CxtI->getParent()->getParent()); + for (auto &CI : Q.AT->assumptions(F)) { + CallInst *I = CI; + if (Q.ExclInvs.count(I)) + continue; + + if (match(I, m_Intrinsic(m_Specific(V))) && + isValidAssumeForContext(I, Q, DL)) { + assert(BitWidth == 1 && "assume operand is not i1?"); + KnownZero.clearAllBits(); + KnownOne.setAllBits(); + return; + } + + Value *A, *B; + auto m_V = m_CombineOr(m_Specific(V), + m_CombineOr(m_PtrToInt(m_Specific(V)), + m_BitCast(m_Specific(V)))); + + CmpInst::Predicate Pred; + ConstantInt *C; + // assume(v = a) + if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_V, m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + KnownZero |= RHSKnownZero; + KnownOne |= RHSKnownOne; + // assume(v & b = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I)); + + // For those bits in the mask that are known to be one, we can propagate + // known bits from the RHS to V. + KnownZero |= RHSKnownZero & MaskKnownOne; + KnownOne |= RHSKnownOne & MaskKnownOne; + // assume(~(v & b) = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), + m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); + computeKnownBits(B, MaskKnownZero, MaskKnownOne, DL, Depth+1, Query(Q, I)); + + // For those bits in the mask that are known to be one, we can propagate + // inverted known bits from the RHS to V. + KnownZero |= RHSKnownOne & MaskKnownOne; + KnownOne |= RHSKnownZero & MaskKnownOne; + // assume(v | b = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); + computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I)); + + // For those bits in B that are known to be zero, we can propagate known + // bits from the RHS to V. + KnownZero |= RHSKnownZero & BKnownZero; + KnownOne |= RHSKnownOne & BKnownZero; + // assume(~(v | b) = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), + m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); + computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I)); + + // For those bits in B that are known to be zero, we can propagate + // inverted known bits from the RHS to V. + KnownZero |= RHSKnownOne & BKnownZero; + KnownOne |= RHSKnownZero & BKnownZero; + // assume(v ^ b = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); + computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I)); + + // For those bits in B that are known to be zero, we can propagate known + // bits from the RHS to V. For those bits in B that are known to be one, + // we can propagate inverted known bits from the RHS to V. + KnownZero |= RHSKnownZero & BKnownZero; + KnownOne |= RHSKnownOne & BKnownZero; + KnownZero |= RHSKnownOne & BKnownOne; + KnownOne |= RHSKnownZero & BKnownOne; + // assume(~(v ^ b) = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), + m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); + computeKnownBits(B, BKnownZero, BKnownOne, DL, Depth+1, Query(Q, I)); + + // For those bits in B that are known to be zero, we can propagate + // inverted known bits from the RHS to V. For those bits in B that are + // known to be one, we can propagate known bits from the RHS to V. + KnownZero |= RHSKnownOne & BKnownZero; + KnownOne |= RHSKnownZero & BKnownZero; + KnownZero |= RHSKnownZero & BKnownOne; + KnownOne |= RHSKnownOne & BKnownOne; + // assume(v << c = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), + m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + // For those bits in RHS that are known, we can propagate them to known + // bits in V shifted to the right by C. + KnownZero |= RHSKnownZero.lshr(C->getZExtValue()); + KnownOne |= RHSKnownOne.lshr(C->getZExtValue()); + // assume(~(v << c) = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), + m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + // For those bits in RHS that are known, we can propagate them inverted + // to known bits in V shifted to the right by C. + KnownZero |= RHSKnownOne.lshr(C->getZExtValue()); + KnownOne |= RHSKnownZero.lshr(C->getZExtValue()); + // assume(v >> c = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), + m_AShr(m_V, + m_ConstantInt(C))), + m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + // For those bits in RHS that are known, we can propagate them to known + // bits in V shifted to the right by C. + KnownZero |= RHSKnownZero << C->getZExtValue(); + KnownOne |= RHSKnownOne << C->getZExtValue(); + // assume(~(v >> c) = a) + } else if (match(I, m_Intrinsic( + m_c_ICmp(Pred, m_Not(m_CombineOr( + m_LShr(m_V, m_ConstantInt(C)), + m_AShr(m_V, m_ConstantInt(C)))), + m_Value(A)))) && + Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + // For those bits in RHS that are known, we can propagate them inverted + // to known bits in V shifted to the right by C. + KnownZero |= RHSKnownOne << C->getZExtValue(); + KnownOne |= RHSKnownZero << C->getZExtValue(); + // assume(v >=_s c) where c is non-negative + } else if (match(I, m_Intrinsic( + m_ICmp(Pred, m_V, m_Value(A)))) && + Pred == ICmpInst::ICMP_SGE && + isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + + if (RHSKnownZero.isNegative()) { + // We know that the sign bit is zero. + KnownZero |= APInt::getSignBit(BitWidth); + } + // assume(v >_s c) where c is at least -1. + } else if (match(I, m_Intrinsic( + m_ICmp(Pred, m_V, m_Value(A)))) && + Pred == ICmpInst::ICMP_SGT && + isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + + if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) { + // We know that the sign bit is zero. + KnownZero |= APInt::getSignBit(BitWidth); + } + // assume(v <=_s c) where c is negative + } else if (match(I, m_Intrinsic( + m_ICmp(Pred, m_V, m_Value(A)))) && + Pred == ICmpInst::ICMP_SLE && + isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + + if (RHSKnownOne.isNegative()) { + // We know that the sign bit is one. + KnownOne |= APInt::getSignBit(BitWidth); + } + // assume(v <_s c) where c is non-positive + } else if (match(I, m_Intrinsic( + m_ICmp(Pred, m_V, m_Value(A)))) && + Pred == ICmpInst::ICMP_SLT && + isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + + if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) { + // We know that the sign bit is one. + KnownOne |= APInt::getSignBit(BitWidth); + } + // assume(v <=_u c) + } else if (match(I, m_Intrinsic( + m_ICmp(Pred, m_V, m_Value(A)))) && + Pred == ICmpInst::ICMP_ULE && + isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + + // Whatever high bits in c are zero are known to be zero. + KnownZero |= + APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); + // assume(v <_u c) + } else if (match(I, m_Intrinsic( + m_ICmp(Pred, m_V, m_Value(A)))) && + Pred == ICmpInst::ICMP_ULT && + isValidAssumeForContext(I, Q, DL)) { + APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); + computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I)); + + // Whatever high bits in c are zero are known to be zero (if c is a power + // of 2, then one more). + if (isKnownToBeAPowerOfTwo(A, false, Depth+1, Query(Q, I))) + KnownZero |= + APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1); + else + KnownZero |= + APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()); + } + } +} + /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. /// @@ -224,8 +743,9 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth) { +void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth, + const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); @@ -270,6 +790,17 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, return; } + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) { + KnownZero.clearAllBits(); KnownOne.clearAllBits(); + } else { + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1, Q); + } + return; + } + // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast(V)) { unsigned Align = GV->getAlignment(); @@ -295,25 +826,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownOne.clearAllBits(); return; } - // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has - // the bits of its aliasee. - if (GlobalAlias *GA = dyn_cast(V)) { - if (GA->mayBeOverridden()) { - KnownZero.clearAllBits(); KnownOne.clearAllBits(); - } else { - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); - } - return; - } if (Argument *A = dyn_cast(V)) { - unsigned Align = 0; + unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; - if (A->hasByValOrInAllocaAttr()) { - // Get alignment information off byval/inalloca arguments if specified in - // the IR. - Align = A->getParamAlignment(); - } else if (TD && A->hasStructRetAttr()) { + if (!Align && TD && A->hasStructRetAttr()) { // An sret parameter has at least the ABI alignment of the return type. Type *EltTy = cast(A->getType())->getElementType(); if (EltTy->isSized()) @@ -322,6 +839,10 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Align) KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); + + // Don't give up yet... there might be an assumption that provides more + // information... + computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); return; } @@ -331,6 +852,9 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (Depth == MaxDepth) return; // Limit search depth. + // Check whether a nearby assume intrinsic can determine some known bits. + computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q); + Operator *I = dyn_cast(V); if (!I) return; @@ -343,8 +867,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; @@ -353,8 +877,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; @@ -363,8 +887,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); @@ -376,19 +900,20 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Mul: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, @@ -398,9 +923,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Select: - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, - Depth+1); + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -415,6 +939,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; // Can't work with floating point. case Instruction::PtrToInt: case Instruction::IntToPtr: + case Instruction::AddrSpaceCast: // Pointers could be different sizes. // We can't handle these if we don't know the pointer size. if (!TD) break; // FALL THROUGH and handle them the same as zext/trunc. @@ -435,7 +960,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. @@ -449,7 +974,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); break; } break; @@ -460,7 +985,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -476,11 +1001,10 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 - break; } break; case Instruction::LShr: @@ -490,12 +1014,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - break; } break; case Instruction::AShr: @@ -505,7 +1028,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -514,21 +1037,20 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero |= HighBits; else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. KnownOne |= HighBits; - break; } break; case Instruction::Sub: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth); + Depth, Q); break; } case Instruction::Add: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, - Depth); + Depth, Q); break; } case Instruction::SRem: @@ -536,7 +1058,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, + Depth+1, Q); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -561,7 +1084,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, - Depth+1); + Depth+1, Q); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(BitWidth - 1); @@ -574,7 +1097,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, - Depth+1); + Depth+1, Q); KnownZero |= ~LowBits; KnownOne &= LowBits; break; @@ -583,8 +1106,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -608,7 +1131,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // to determine if we can prove known low zero bits. APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, - Depth+1); + Depth+1, Q); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -644,7 +1167,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); + computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); @@ -686,11 +1209,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1); + computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q); KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), @@ -722,7 +1245,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, - MaxDepth-1); + MaxDepth-1, Q); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -774,19 +1297,19 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + KnownOne, KnownZero2, KnownOne2, TD, Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + KnownOne, KnownZero2, KnownOne2, TD, Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, KnownZero, KnownOne, - KnownZero2, KnownOne2, TD, Depth); + KnownZero2, KnownOne2, TD, Depth, Q); break; } } @@ -796,10 +1319,11 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } -/// ComputeSignBit - Determine whether the sign bit is known to be zero or -/// one. Convenience wrapper around computeKnownBits. -void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout *TD, unsigned Depth) { +/// Determine whether the sign bit is known to be zero or one. +/// Convenience wrapper around computeKnownBits. +void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, + const DataLayout *TD, unsigned Depth, + const Query &Q) { unsigned BitWidth = getBitWidth(V->getType(), TD); if (!BitWidth) { KnownZero = false; @@ -808,16 +1332,17 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, TD, Depth); + computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } -/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one +/// Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to -/// be a power of two when defined. Supports values with integer or pointer +/// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { +bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth, + const Query &Q) { if (Constant *C = dyn_cast(V)) { if (C->isNullValue()) return OrZero; @@ -844,19 +1369,20 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) - return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth); + return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q); if (ZExtInst *ZI = dyn_cast(V)) - return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth); + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q); if (SelectInst *SI = dyn_cast(V)) - return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) && - isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth); + return + isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. - if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) || - isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth)) + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) @@ -871,19 +1397,19 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { if (match(X, m_And(m_Specific(Y), m_Value())) || match(X, m_And(m_Value(), m_Specific(Y)))) - if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth)) + if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q)) return true; if (match(Y, m_And(m_Specific(X), m_Value())) || match(Y, m_And(m_Value(), m_Specific(X)))) - if (isKnownToBeAPowerOfTwo(X, OrZero, Depth)) + if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q)) return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth); + computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q); APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth); + computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -900,7 +1426,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { // copying a sign bit (sdiv int_min, 2). if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { - return isKnownToBeAPowerOfTwo(cast(V)->getOperand(0), OrZero, Depth); + return isKnownToBeAPowerOfTwo(cast(V)->getOperand(0), OrZero, + Depth, Q); } return false; @@ -913,7 +1440,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { /// /// Currently this routine does not support vector GEPs. static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, - unsigned Depth) { + unsigned Depth, const Query &Q) { if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) return false; @@ -922,7 +1449,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, // If the base pointer is non-null, we cannot walk to a null address with an // inbounds GEP in address space zero. - if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth)) + if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q)) return true; // Past this, if we don't have DataLayout, we can't do much. @@ -965,18 +1492,36 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, if (Depth++ >= MaxDepth) continue; - if (isKnownNonZero(GTI.getOperand(), DL, Depth)) + if (isKnownNonZero(GTI.getOperand(), DL, Depth, Q)) return true; } return false; } -/// isKnownNonZero - Return true if the given value is known to be non-zero -/// when defined. For vectors return true if every element is known to be -/// non-zero when defined. Supports values with integer or pointer type and -/// vectors of integers. -bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { +/// Does the 'Range' metadata (which must be a valid MD_range operand list) +/// ensure that the value it's attached to is never Value? 'RangeType' is +/// is the type of the value described by the range. +static bool rangeMetadataExcludesValue(MDNode* Ranges, + const APInt& Value) { + const unsigned NumRanges = Ranges->getNumOperands() / 2; + assert(NumRanges >= 1); + for (unsigned i = 0; i < NumRanges; ++i) { + ConstantInt *Lower = cast(Ranges->getOperand(2*i + 0)); + ConstantInt *Upper = cast(Ranges->getOperand(2*i + 1)); + ConstantRange Range(Lower->getValue(), Upper->getValue()); + if (Range.contains(Value)) + return false; + } + return true; +} + +/// Return true if the given value is known to be non-zero when defined. +/// For vectors return true if every element is known to be non-zero when +/// defined. Supports values with integer or pointer type and vectors of +/// integers. +bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth, + const Query &Q) { if (Constant *C = dyn_cast(V)) { if (C->isNullValue()) return false; @@ -987,6 +1532,18 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { return false; } + if (Instruction* I = dyn_cast(V)) { + if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) { + // If the possible ranges don't contain zero, then the value is + // definitely non-zero. + if (IntegerType* Ty = dyn_cast(V->getType())) { + const APInt ZeroValue(Ty->getBitWidth(), 0); + if (rangeMetadataExcludesValue(Ranges, ZeroValue)) + return true; + } + } + } + // The remaining tests are all recursive, so bail out if we hit the limit. if (Depth++ >= MaxDepth) return false; @@ -996,7 +1553,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { if (isKnownNonNull(V)) return true; if (GEPOperator *GEP = dyn_cast(V)) - if (isGEPKnownNonNull(GEP, TD, Depth)) + if (isGEPKnownNonNull(GEP, TD, Depth, Q)) return true; } @@ -1005,11 +1562,12 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { // X | Y != 0 if X != 0 or Y != 0. Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) - return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth); + return isKnownNonZero(X, TD, Depth, Q) || + isKnownNonZero(Y, TD, Depth, Q); // ext X != 0 if X != 0. if (isa(V) || isa(V)) - return isKnownNonZero(cast(V)->getOperand(0), TD, Depth); + return isKnownNonZero(cast(V)->getOperand(0), TD, Depth, Q); // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. @@ -1017,11 +1575,11 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { // shl nuw can't remove any non-zero bits. OverflowingBinaryOperator *BO = cast(V); if (BO->hasNoUnsignedWrap()) - return isKnownNonZero(X, TD, Depth); + return isKnownNonZero(X, TD, Depth, Q); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q); if (KnownOne[0]) return true; } @@ -1031,28 +1589,29 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { // shr exact can only shift out zero bits. PossiblyExactOperator *BO = cast(V); if (BO->isExact()) - return isKnownNonZero(X, TD, Depth); + return isKnownNonZero(X, TD, Depth, Q); bool XKnownNonNegative, XKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q); if (XKnownNegative) return true; } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { - return isKnownNonZero(X, TD, Depth); + return isKnownNonZero(X, TD, Depth, Q); } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { bool XKnownNonNegative, XKnownNegative; bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth); + ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q); + ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. if (XKnownNonNegative && YKnownNonNegative) - if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth)) + if (isKnownNonZero(X, TD, Depth, Q) || + isKnownNonZero(Y, TD, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not @@ -1063,20 +1622,22 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, KnownZero, KnownOne, TD, Depth); + computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q); if ((KnownOne & Mask) != 0) return true; } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth)) + if (XKnownNonNegative && + isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q)) return true; - if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth)) + if (YKnownNonNegative && + isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q)) return true; } // X * Y. @@ -1085,51 +1646,53 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { // If X and Y are non-zero then so is X * Y as long as the multiplication // does not overflow. if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && - isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth)) + isKnownNonZero(X, TD, Depth, Q) && + isKnownNonZero(Y, TD, Depth, Q)) return true; } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (SelectInst *SI = dyn_cast(V)) { - if (isKnownNonZero(SI->getTrueValue(), TD, Depth) && - isKnownNonZero(SI->getFalseValue(), TD, Depth)) + if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) && + isKnownNonZero(SI->getFalseValue(), TD, Depth, Q)) return true; } if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); return KnownOne != 0; } -/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use -/// this predicate to simplify operations downstream. Mask is known to be zero -/// for bits that V cannot have. +/// Return true if 'V & Mask' is known to be zero. We use this predicate to +/// simplify operations downstream. Mask is known to be zero for bits that V +/// cannot have. /// /// This function is defined on values with integer type, values with pointer /// type (but only if TD is non-null), and vectors of integers. In the case /// where V is a vector, the mask, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, - const DataLayout *TD, unsigned Depth) { +bool MaskedValueIsZero(Value *V, const APInt &Mask, + const DataLayout *TD, unsigned Depth, + const Query &Q) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - computeKnownBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); return (KnownZero & Mask) == Mask; } -/// ComputeNumSignBits - Return the number of times the sign bit of the -/// register is replicated into the other bits. We know that at least 1 bit -/// is always equal to the sign bit (itself), but other cases can give us -/// information. For example, immediately after an "ashr X, 2", we know that -/// the top 3 bits are all equal to each other, so we return 3. +/// Return the number of times the sign bit of the register is replicated into +/// the other bits. We know that at least 1 bit is always equal to the sign bit +/// (itself), but other cases can give us information. For example, immediately +/// after an "ashr X, 2", we know that the top 3 bits are all equal to each +/// other, so we return 3. /// /// 'Op' must have a scalar integer type. /// -unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, - unsigned Depth) { +unsigned ComputeNumSignBits(Value *V, const DataLayout *TD, + unsigned Depth, const Query &Q) { assert((TD || V->getType()->isIntOrIntVectorTy()) && "ComputeNumSignBits requires a DataLayout object to operate " "on non-integer values!"); @@ -1150,10 +1713,10 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); - return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; + return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp; case Instruction::AShr: { - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); // ashr X, C -> adds C sign bits. Vectors too. const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { @@ -1166,7 +1729,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, const APInt *ShAmt; if (match(U->getOperand(1), m_APInt(ShAmt))) { // shl destroys sign bits. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); Tmp2 = ShAmt->getZExtValue(); if (Tmp2 >= TyBits || // Bad shift. Tmp2 >= Tmp) break; // Shifted all sign bits out. @@ -1178,9 +1741,9 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, case Instruction::Or: case Instruction::Xor: // NOT is handled here. // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses @@ -1189,22 +1752,22 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, break; case Instruction::Select: - Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); + Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q); return std::min(Tmp, Tmp2); case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): if (ConstantInt *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -1217,19 +1780,19 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, return Tmp; } - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); if (Tmp2 == 1) return 1; return std::min(Tmp, Tmp2)-1; case Instruction::Sub: - Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q); if (Tmp2 == 1) return 1; // Handle NEG. if (ConstantInt *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) @@ -1245,7 +1808,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); + Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; @@ -1256,11 +1819,12 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. - Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); + Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q); for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { if (Tmp == 1) return Tmp; Tmp = std::min(Tmp, - ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1)); + ComputeNumSignBits(PN->getIncomingValue(i), TD, + Depth+1, Q)); } return Tmp; } @@ -1275,7 +1839,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; - computeKnownBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -1295,9 +1859,9 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros())); } -/// ComputeMultiple - This function computes the integer multiple of Base that -/// equals V. If successful, it returns true and returns the multiple in -/// Multiple. If unsuccessful, it returns false. It looks +/// This function computes the integer multiple of Base that equals V. +/// If successful, it returns true and returns the multiple in +/// Multiple. If unsuccessful, it returns false. It looks /// through SExt instructions only if LookThroughSExt is true. bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool LookThroughSExt, unsigned Depth) { @@ -1415,8 +1979,8 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, return false; } -/// CannotBeNegativeZero - Return true if we can prove that the specified FP -/// value is never equal to -0.0. +/// Return true if we can prove that the specified FP value is never equal to +/// -0.0. /// /// NOTE: this function will need to be revisited when we support non-default /// rounding modes! @@ -1469,8 +2033,8 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return false; } -/// isBytewiseValue - If the specified value can be set by repeating the same -/// byte in memory, return the i8 value that it is represented with. This is +/// If the specified value can be set by repeating the same byte in memory, +/// return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, /// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated /// byte store (e.g. i16 0x1234), return null. @@ -1618,7 +2182,7 @@ static Value *BuildSubAggregate(Value *From, ArrayRef idx_range, return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); } -/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if +/// Given an aggregrate and an sequence of indices, see if /// the scalar value indexed is already around as a register, for example if it /// were inserted directly into the aggregrate. /// @@ -1708,9 +2272,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, return nullptr; } -/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if -/// it can be expressed as a base pointer plus a constant offset. Return the -/// base and offset to the caller. +/// Analyze the specified pointer to see if it can be expressed as a base +/// pointer plus a constant offset. Return the base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout *DL) { // Without DataLayout, conservatively assume 64-bit offsets, which is @@ -1731,7 +2294,8 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, } Ptr = GEP->getPointerOperand(); - } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) { + } else if (Operator::getOpcode(Ptr) == Instruction::BitCast || + Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) { Ptr = cast(Ptr)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(Ptr)) { if (GA->mayBeOverridden()) @@ -1746,9 +2310,9 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, } -/// getConstantStringInfo - This function computes the length of a -/// null-terminated C string pointed to by V. If successful, it returns true -/// and returns the string in Str. If unsuccessful, it returns false. +/// This function computes the length of a null-terminated C string pointed to +/// by V. If successful, it returns true and returns the string in Str. +/// If unsuccessful, it returns false. bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, uint64_t Offset, bool TrimAtNul) { assert(V); @@ -1832,16 +2396,16 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // nodes. // TODO: See if we can integrate these two together. -/// GetStringLengthH - If we can compute the length of the string pointed to by +/// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. -static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { +static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl &PHIs) { // Look through noop bitcast instructions. V = V->stripPointerCasts(); // If this is a PHI node, there are two cases: either we have already seen it // or we haven't. if (PHINode *PN = dyn_cast(V)) { - if (!PHIs.insert(PN)) + if (!PHIs.insert(PN).second) return ~0ULL; // already in the set. // If it was new, see if all the input strings are the same length. @@ -1881,7 +2445,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { return StrData.size()+1; } -/// GetStringLength - If we can compute the length of the string pointed to by +/// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. uint64_t llvm::GetStringLength(Value *V) { if (!V->getType()->isPointerTy()) return 0; @@ -1900,7 +2464,8 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { if (GEPOperator *GEP = dyn_cast(V)) { V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { + } else if (Operator::getOpcode(V) == Instruction::BitCast || + Operator::getOpcode(V) == Instruction::AddrSpaceCast) { V = cast(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(V)) { if (GA->mayBeOverridden()) @@ -1909,7 +2474,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { } else { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast(V)) - // TODO: Acquire a DominatorTree and use it. + // TODO: Acquire a DominatorTree and AssumptionTracker and use them. if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) { V = Simplified; continue; @@ -1934,7 +2499,7 @@ llvm::GetUnderlyingObjects(Value *V, Value *P = Worklist.pop_back_val(); P = GetUnderlyingObject(P, TD, MaxLookup); - if (!Visited.insert(P)) + if (!Visited.insert(P).second) continue; if (SelectInst *SI = dyn_cast(P)) { @@ -1953,9 +2518,7 @@ llvm::GetUnderlyingObjects(Value *V, } while (!Worklist.empty()); } -/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer -/// are lifetime markers. -/// +/// Return true if the only users of this pointer are lifetime markers. bool llvm::onlyUsedByLifetimeMarkers(const Value *V) { for (const User *U : V->users()) { const IntrinsicInst *II = dyn_cast(U); @@ -1983,23 +2546,31 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, default: return true; case Instruction::UDiv: - case Instruction::URem: - // x / y is undefined if y == 0, but calculations like x / 3 are safe. - return isKnownNonZero(Inst->getOperand(1), TD); + case Instruction::URem: { + // x / y is undefined if y == 0. + const APInt *V; + if (match(Inst->getOperand(1), m_APInt(V))) + return *V != 0; + return false; + } case Instruction::SDiv: case Instruction::SRem: { - Value *Op = Inst->getOperand(1); - // x / y is undefined if y == 0 - if (!isKnownNonZero(Op, TD)) - return false; - // x / y might be undefined if y == -1 - unsigned BitWidth = getBitWidth(Op->getType(), TD); - if (BitWidth == 0) - return false; - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Op, KnownZero, KnownOne, TD); - return !!KnownZero; + // x / y is undefined if y == 0 or x == INT_MIN and y == -1 + const APInt *X, *Y; + if (match(Inst->getOperand(1), m_APInt(Y))) { + if (*Y != 0) { + if (*Y == -1) { + // The numerator can't be MinSignedValue if the denominator is -1. + if (match(Inst->getOperand(0), m_APInt(X))) + return !Y->isMinSignedValue(); + // The numerator *might* be MinSignedValue. + return false; + } + // The denominator is not 0 or -1, it's safe to proceed. + return true; + } + } + return false; } case Instruction::Load: { const LoadInst *LI = cast(Inst); @@ -2010,41 +2581,44 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return LI->getPointerOperand()->isDereferenceablePointer(TD); } case Instruction::Call: { - if (const IntrinsicInst *II = dyn_cast(Inst)) { - switch (II->getIntrinsicID()) { - // These synthetic intrinsics have no side-effects and just mark - // information about their operands. - // FIXME: There are other no-op synthetic instructions that potentially - // should be considered at least *safe* to speculate... - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - return true; - - case Intrinsic::bswap: - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: - case Intrinsic::objectsize: - case Intrinsic::sadd_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::umul_with_overflow: - case Intrinsic::usub_with_overflow: - return true; - // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set - // errno like libm sqrt would. - case Intrinsic::sqrt: - case Intrinsic::fma: - case Intrinsic::fmuladd: - return true; - // TODO: some fp intrinsics are marked as having the same error handling - // as libm. They're safe to speculate when they won't error. - // TODO: are convert_{from,to}_fp16 safe? - // TODO: can we list target-specific intrinsics here? - default: break; - } - } + if (const IntrinsicInst *II = dyn_cast(Inst)) { + switch (II->getIntrinsicID()) { + // These synthetic intrinsics have no side-effects and just mark + // information about their operands. + // FIXME: There are other no-op synthetic instructions that potentially + // should be considered at least *safe* to speculate... + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + return true; + + case Intrinsic::bswap: + case Intrinsic::ctlz: + case Intrinsic::ctpop: + case Intrinsic::cttz: + case Intrinsic::objectsize: + case Intrinsic::sadd_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::usub_with_overflow: + return true; + // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set + // errno like libm sqrt would. + case Intrinsic::sqrt: + case Intrinsic::fma: + case Intrinsic::fmuladd: + case Intrinsic::fabs: + case Intrinsic::minnum: + case Intrinsic::maxnum: + return true; + // TODO: some fp intrinsics are marked as having the same error handling + // as libm. They're safe to speculate when they won't error. + // TODO: are convert_{from,to}_fp16 safe? + // TODO: can we list target-specific intrinsics here? + default: break; + } + } return false; // The called function could have undefined behavior or // side-effects, even if marked readnone nounwind. } @@ -2067,8 +2641,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, } } -/// isKnownNonNull - Return true if we know that the specified value is never -/// null. +/// Return true if we know that the specified value is never null. bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { // Alloca never returns null, malloc might. if (isa(V)) return true; @@ -2081,8 +2654,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { if (const GlobalValue *GV = dyn_cast(V)) return !GV->hasExternalWeakLinkage(); + // A Load tagged w/nonnull metadata is never null. + if (const LoadInst *LI = dyn_cast(V)) + return LI->getMetadata(LLVMContext::MD_nonnull); + if (ImmutableCallSite CS = V) - if (CS.paramHasAttr(0, Attribute::NonNull)) + if (CS.isReturnNonNull()) return true; // operator new never returns null. diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 1e5bcdd..6523bce 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -161,14 +161,10 @@ static const char *isLabelTail(const char *CurPtr) { // Lexer definition. //===----------------------------------------------------------------------===// -LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err, +LLLexer::LLLexer(StringRef StartBuf, SourceMgr &sm, SMDiagnostic &Err, LLVMContext &C) : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) { - CurPtr = CurBuf->getBufferStart(); -} - -std::string LLLexer::getFilename() const { - return CurBuf->getBufferIdentifier(); + CurPtr = CurBuf.begin(); } int LLLexer::getNextChar() { @@ -178,7 +174,7 @@ int LLLexer::getNextChar() { case 0: // A nul character in the stream is either the end of the current buffer or // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf->getBufferEnd()) + if (CurPtr-1 != CurBuf.end()) return 0; // Just whitespace. // Otherwise, return end of file. @@ -516,8 +512,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(private); KEYWORD(internal); - KEYWORD(linker_private); // NOTE: deprecated, for parser compatibility - KEYWORD(linker_private_weak); // NOTE: deprecated, for parser compatibility KEYWORD(available_externally); KEYWORD(linkonce); KEYWORD(linkonce_odr); @@ -586,6 +580,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_stdcallcc); KEYWORD(x86_fastcallcc); KEYWORD(x86_thiscallcc); + KEYWORD(x86_vectorcallcc); KEYWORD(arm_apcscc); KEYWORD(arm_aapcscc); KEYWORD(arm_aapcs_vfpcc); @@ -612,6 +607,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(byval); KEYWORD(inalloca); KEYWORD(cold); + KEYWORD(dereferenceable); KEYWORD(inlinehint); KEYWORD(inreg); KEYWORD(jumptable); @@ -669,6 +665,10 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x); KEYWORD(blockaddress); + // Use-list order directives. + KEYWORD(uselistorder); + KEYWORD(uselistorder_bb); + KEYWORD(personality); KEYWORD(cleanup); KEYWORD(catch); diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index d42de57..219827f 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LIB_ASMPARSER_LLLEXER_H -#define LIB_ASMPARSER_LLLEXER_H +#ifndef LLVM_LIB_ASMPARSER_LLLEXER_H +#define LLVM_LIB_ASMPARSER_LLLEXER_H #include "LLToken.h" #include "llvm/ADT/APFloat.h" @@ -28,7 +28,7 @@ namespace llvm { class LLLexer { const char *CurPtr; - MemoryBuffer *CurBuf; + StringRef CurBuf; SMDiagnostic &ErrorInfo; SourceMgr &SM; LLVMContext &Context; @@ -43,7 +43,7 @@ namespace llvm { APSInt APSIntVal; public: - explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &, + explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &, LLVMContext &C); ~LLLexer() {} @@ -67,8 +67,6 @@ namespace llvm { void Warning(LocTy WarningLoc, const Twine &Msg) const; void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } - std::string getFilename() const; - private: lltok::Kind LexToken(); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index be55ac6..2c835f9 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -129,28 +130,11 @@ bool LLParser::ValidateEndOfModule() { } } - // If there are entries in ForwardRefBlockAddresses at this point, they are - // references after the function was defined. Resolve those now. - while (!ForwardRefBlockAddresses.empty()) { - // Okay, we are referencing an already-parsed function, resolve them now. - Function *TheFn = nullptr; - const ValID &Fn = ForwardRefBlockAddresses.begin()->first; - if (Fn.Kind == ValID::t_GlobalName) - TheFn = M->getFunction(Fn.StrVal); - else if (Fn.UIntVal < NumberedVals.size()) - TheFn = dyn_cast(NumberedVals[Fn.UIntVal]); - - if (!TheFn) - return Error(Fn.Loc, "unknown function referenced by blockaddress"); - - // Resolve all these references. - if (ResolveForwardRefBlockAddresses(TheFn, - ForwardRefBlockAddresses.begin()->second, - nullptr)) - return true; - - ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin()); - } + // If there are entries in ForwardRefBlockAddresses at this point, the + // function was never defined. + if (!ForwardRefBlockAddresses.empty()) + return Error(ForwardRefBlockAddresses.begin()->first.Loc, + "expected function name in blockaddress"); for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) if (NumberedTypes[i].second.isValid()) @@ -193,38 +177,6 @@ bool LLParser::ValidateEndOfModule() { return false; } -bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, - std::vector > &Refs, - PerFunctionState *PFS) { - // Loop over all the references, resolving them. - for (unsigned i = 0, e = Refs.size(); i != e; ++i) { - BasicBlock *Res; - if (PFS) { - if (Refs[i].first.Kind == ValID::t_LocalName) - Res = PFS->GetBB(Refs[i].first.StrVal, Refs[i].first.Loc); - else - Res = PFS->GetBB(Refs[i].first.UIntVal, Refs[i].first.Loc); - } else if (Refs[i].first.Kind == ValID::t_LocalID) { - return Error(Refs[i].first.Loc, - "cannot take address of numeric label after the function is defined"); - } else { - Res = dyn_cast_or_null( - TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal)); - } - - if (!Res) - return Error(Refs[i].first.Loc, - "referenced value is not a basic block"); - - // Get the BlockAddress for this and update references to use it. - BlockAddress *BA = BlockAddress::get(TheFn, Res); - Refs[i].second->replaceAllUsesWith(BA); - Refs[i].second->eraseFromParent(); - } - return false; -} - - //===----------------------------------------------------------------------===// // Top-Level Entities //===----------------------------------------------------------------------===// @@ -254,8 +206,6 @@ bool LLParser::ParseTopLevelEntities() { // ('constant'|'global') ... case lltok::kw_private: // OptionalLinkage case lltok::kw_internal: // OptionalLinkage - case lltok::kw_linker_private: // Obsolete OptionalLinkage - case lltok::kw_linker_private_weak: // Obsolete OptionalLinkage case lltok::kw_weak: // OptionalLinkage case lltok::kw_weak_odr: // OptionalLinkage case lltok::kw_linkonce: // OptionalLinkage @@ -289,6 +239,9 @@ bool LLParser::ParseTopLevelEntities() { } case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break; + case lltok::kw_uselistorder: if (ParseUseListOrder()) return true; break; + case lltok::kw_uselistorder_bb: + if (ParseUseListOrderBB()) return true; break; } } } @@ -483,10 +436,10 @@ bool LLParser::ParseUnnamedGlobal() { parseOptionalUnnamedAddr(UnnamedAddr)) return true; - if (HasLinkage || Lex.getKind() != lltok::kw_alias) + if (Lex.getKind() != lltok::kw_alias) return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility, DLLStorageClass, TLM, UnnamedAddr); - return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM, + return ParseAlias(Name, NameLoc, Linkage, Visibility, DLLStorageClass, TLM, UnnamedAddr); } @@ -512,10 +465,11 @@ bool LLParser::ParseNamedGlobal() { parseOptionalUnnamedAddr(UnnamedAddr)) return true; - if (HasLinkage || Lex.getKind() != lltok::kw_alias) + if (Lex.getKind() != lltok::kw_alias) return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility, DLLStorageClass, TLM, UnnamedAddr); - return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM, + + return ParseAlias(Name, NameLoc, Linkage, Visibility, DLLStorageClass, TLM, UnnamedAddr); } @@ -693,33 +647,29 @@ static bool isValidVisibilityForLinkage(unsigned V, unsigned L) { } /// ParseAlias: -/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass -/// OptionalThreadLocal OptionalUnNammedAddr 'alias' -/// OptionalLinkage Aliasee +/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility +/// OptionalDLLStorageClass OptionalThreadLocal +/// OptionalUnNammedAddr 'alias' Aliasee /// /// Aliasee /// ::= TypeAndValue /// /// Everything through OptionalUnNammedAddr has already been parsed. /// -bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, +bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L, unsigned Visibility, unsigned DLLStorageClass, GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr) { assert(Lex.getKind() == lltok::kw_alias); Lex.Lex(); - LocTy LinkageLoc = Lex.getLoc(); - unsigned L; - if (ParseOptionalLinkage(L)) - return true; GlobalValue::LinkageTypes Linkage = (GlobalValue::LinkageTypes) L; if(!GlobalAlias::isValidLinkage(Linkage)) - return Error(LinkageLoc, "invalid linkage type for alias"); + return Error(NameLoc, "invalid linkage type for alias"); if (!isValidVisibilityForLinkage(Visibility, L)) - return Error(LinkageLoc, + return Error(NameLoc, "symbol with local linkage must have default visibility"); Constant *Aliasee; @@ -1052,6 +1002,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, "invalid use of attribute on a function"); break; case lltok::kw_byval: + case lltok::kw_dereferenceable: case lltok::kw_inalloca: case lltok::kw_nest: case lltok::kw_noalias: @@ -1212,6 +1163,16 @@ bool LLParser::ParseUInt32(unsigned &Val) { return false; } +/// ParseUInt64 +/// ::= uint64 +bool LLParser::ParseUInt64(uint64_t &Val) { + if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned()) + return TokError("expected integer"); + Val = Lex.getAPSIntVal().getLimitedValue(); + Lex.Lex(); + return false; +} + /// ParseTLSModel /// := 'localdynamic' /// := 'initialexec' @@ -1284,6 +1245,13 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { continue; } case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break; + case lltok::kw_dereferenceable: { + uint64_t Bytes; + if (ParseOptionalDereferenceableBytes(Bytes)) + return true; + B.addDereferenceableAttr(Bytes); + continue; + } case lltok::kw_inalloca: B.addAttribute(Attribute::InAlloca); break; case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; @@ -1341,6 +1309,13 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { switch (Token) { default: // End of attributes. return HaveError; + case lltok::kw_dereferenceable: { + uint64_t Bytes; + if (ParseOptionalDereferenceableBytes(Bytes)) + return true; + B.addDereferenceableAttr(Bytes); + continue; + } case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break; @@ -1409,10 +1384,6 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { /// ::= 'common' /// ::= 'extern_weak' /// ::= 'external' -/// -/// Deprecated Values: -/// ::= 'linker_private' -/// ::= 'linker_private_weak' bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { HasLinkage = false; switch (Lex.getKind()) { @@ -1430,15 +1401,6 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { case lltok::kw_common: Res = GlobalValue::CommonLinkage; break; case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break; case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break; - - case lltok::kw_linker_private: - case lltok::kw_linker_private_weak: - Lex.Warning("'" + Lex.getStrVal() + "' is deprecated, treating as" - " PrivateLinkage"); - Lex.Lex(); - // treat linker_private and linker_private_weak as PrivateLinkage - Res = GlobalValue::PrivateLinkage; - return false; } Lex.Lex(); HasLinkage = true; @@ -1486,6 +1448,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'x86_stdcallcc' /// ::= 'x86_fastcallcc' /// ::= 'x86_thiscallcc' +/// ::= 'x86_vectorcallcc' /// ::= 'arm_apcscc' /// ::= 'arm_aapcscc' /// ::= 'arm_aapcs_vfpcc' @@ -1502,7 +1465,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'preserve_allcc' /// ::= 'cc' UINT /// -bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { +bool LLParser::ParseOptionalCallingConv(unsigned &CC) { switch (Lex.getKind()) { default: CC = CallingConv::C; return false; case lltok::kw_ccc: CC = CallingConv::C; break; @@ -1511,6 +1474,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break; case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break; case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break; + case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break; case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break; case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break; case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break; @@ -1527,12 +1491,8 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break; case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break; case lltok::kw_cc: { - unsigned ArbitraryCC; Lex.Lex(); - if (ParseUInt32(ArbitraryCC)) - return true; - CC = static_cast(ArbitraryCC); - return false; + return ParseUInt32(CC); } } @@ -1606,6 +1566,26 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { return false; } +/// ParseOptionalDereferenceableBytes +/// ::= /* empty */ +/// ::= 'dereferenceable' '(' 4 ')' +bool LLParser::ParseOptionalDereferenceableBytes(uint64_t &Bytes) { + Bytes = 0; + if (!EatIfPresent(lltok::kw_dereferenceable)) + return false; + LocTy ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return Error(ParenLoc, "expected '('"); + LocTy DerefLoc = Lex.getLoc(); + if (ParseUInt64(Bytes)) return true; + ParenLoc = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return Error(ParenLoc, "expected ')'"); + if (!Bytes) + return Error(DerefLoc, "dereferenceable bytes must be non-zero"); + return false; +} + /// ParseOptionalCommaAlign /// ::= /// ::= ',' align 4 @@ -1837,7 +1817,8 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) { /// Arg /// ::= Type OptionalAttributes Value OptionalAttributes bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, - PerFunctionState &PFS) { + PerFunctionState &PFS, bool IsMustTailCall, + bool InVarArgsFunc) { if (ParseToken(lltok::lparen, "expected '(' in call")) return true; @@ -1848,6 +1829,17 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, ParseToken(lltok::comma, "expected ',' in argument list")) return true; + // Parse an ellipsis if this is a musttail call in a variadic function. + if (Lex.getKind() == lltok::dotdotdot) { + const char *Msg = "unexpected ellipsis in argument list for "; + if (!IsMustTailCall) + return TokError(Twine(Msg) + "non-musttail call"); + if (!InVarArgsFunc) + return TokError(Twine(Msg) + "musttail call in non-varargs function"); + Lex.Lex(); // Lex the '...', it is purely for readability. + return ParseToken(lltok::rparen, "expected ')' at end of argument list"); + } + // Parse the argument. LocTy ArgLoc; Type *ArgTy = nullptr; @@ -1864,6 +1856,10 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, ArgAttrs))); } + if (IsMustTailCall && InVarArgsFunc) + return TokError("expected '...' at end of argument list for musttail call " + "in varargs function"); + Lex.Lex(); // Lex the ')'. return false; } @@ -2159,28 +2155,6 @@ LLParser::PerFunctionState::~PerFunctionState() { } bool LLParser::PerFunctionState::FinishFunction() { - // Check to see if someone took the address of labels in this block. - if (!P.ForwardRefBlockAddresses.empty()) { - ValID FunctionID; - if (!F.getName().empty()) { - FunctionID.Kind = ValID::t_GlobalName; - FunctionID.StrVal = F.getName(); - } else { - FunctionID.Kind = ValID::t_GlobalID; - FunctionID.UIntVal = FunctionNumber; - } - - std::map > >::iterator - FRBAI = P.ForwardRefBlockAddresses.find(FunctionID); - if (FRBAI != P.ForwardRefBlockAddresses.end()) { - // Resolve all these references. - if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this)) - return true; - - P.ForwardRefBlockAddresses.erase(FRBAI); - } - } - if (!ForwardRefVals.empty()) return P.Error(ForwardRefVals.begin()->second.second, "use of undefined value '%" + ForwardRefVals.begin()->first + @@ -2222,7 +2196,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, } // Don't make placeholders with invalid type. - if (!Ty->isFirstClassType() && !Ty->isLabelTy()) { + if (!Ty->isFirstClassType()) { P.Error(Loc, "invalid use of a non-first-class type"); return nullptr; } @@ -2263,7 +2237,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, return nullptr; } - if (!Ty->isFirstClassType() && !Ty->isLabelTy()) { + if (!Ty->isFirstClassType()) { P.Error(Loc, "invalid use of a non-first-class type"); return nullptr; } @@ -2566,12 +2540,56 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName) return Error(Label.Loc, "expected basic block name in blockaddress"); - // Make a global variable as a placeholder for this reference. - GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), - false, GlobalValue::InternalLinkage, - nullptr, ""); - ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef)); - ID.ConstantVal = FwdRef; + // Try to find the function (but skip it if it's forward-referenced). + GlobalValue *GV = nullptr; + if (Fn.Kind == ValID::t_GlobalID) { + if (Fn.UIntVal < NumberedVals.size()) + GV = NumberedVals[Fn.UIntVal]; + } else if (!ForwardRefVals.count(Fn.StrVal)) { + GV = M->getNamedValue(Fn.StrVal); + } + Function *F = nullptr; + if (GV) { + // Confirm that it's actually a function with a definition. + if (!isa(GV)) + return Error(Fn.Loc, "expected function name in blockaddress"); + F = cast(GV); + if (F->isDeclaration()) + return Error(Fn.Loc, "cannot take blockaddress inside a declaration"); + } + + if (!F) { + // Make a global variable as a placeholder for this reference. + GlobalValue *&FwdRef = ForwardRefBlockAddresses[Fn][Label]; + if (!FwdRef) + FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), false, + GlobalValue::InternalLinkage, nullptr, ""); + ID.ConstantVal = FwdRef; + ID.Kind = ValID::t_Constant; + return false; + } + + // We found the function; now find the basic block. Don't use PFS, since we + // might be inside a constant expression. + BasicBlock *BB; + if (BlockAddressPFS && F == &BlockAddressPFS->getFunction()) { + if (Label.Kind == ValID::t_LocalID) + BB = BlockAddressPFS->GetBB(Label.UIntVal, Label.Loc); + else + BB = BlockAddressPFS->GetBB(Label.StrVal, Label.Loc); + if (!BB) + return Error(Label.Loc, "referenced value is not a basic block"); + } else { + if (Label.Kind == ValID::t_LocalID) + return Error(Label.Loc, "cannot take address of numeric label after " + "the function is defined"); + BB = dyn_cast_or_null( + F->getValueSymbolTable().lookup(Label.StrVal)); + if (!BB) + return Error(Label.Loc, "referenced value is not a basic block"); + } + + ID.ConstantVal = BlockAddress::get(F, BB); ID.Kind = ValID::t_Constant; return false; } @@ -2886,7 +2904,7 @@ bool LLParser::parseOptionalComdat(Comdat *&C) { /// ParseGlobalValueVector /// ::= /*empty*/ /// ::= TypeAndValue (',' TypeAndValue)* -bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts) { +bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts) { // Empty list. if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::rsquare || @@ -3111,7 +3129,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { unsigned Visibility; unsigned DLLStorageClass; AttrBuilder RetAttrs; - CallingConv::ID CC; + unsigned CC; Type *RetType = nullptr; LocTy RetTypeLoc = Lex.getLoc(); if (ParseOptionalLinkage(Linkage) || @@ -3314,13 +3332,63 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { ArgList[i].Name + "'"); } + if (isDefine) + return false; + + // Check the declaration has no block address forward references. + ValID ID; + if (FunctionName.empty()) { + ID.Kind = ValID::t_GlobalID; + ID.UIntVal = NumberedVals.size() - 1; + } else { + ID.Kind = ValID::t_GlobalName; + ID.StrVal = FunctionName; + } + auto Blocks = ForwardRefBlockAddresses.find(ID); + if (Blocks != ForwardRefBlockAddresses.end()) + return Error(Blocks->first.Loc, + "cannot take blockaddress inside a declaration"); return false; } +bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() { + ValID ID; + if (FunctionNumber == -1) { + ID.Kind = ValID::t_GlobalName; + ID.StrVal = F.getName(); + } else { + ID.Kind = ValID::t_GlobalID; + ID.UIntVal = FunctionNumber; + } + + auto Blocks = P.ForwardRefBlockAddresses.find(ID); + if (Blocks == P.ForwardRefBlockAddresses.end()) + return false; + + for (const auto &I : Blocks->second) { + const ValID &BBID = I.first; + GlobalValue *GV = I.second; + + assert((BBID.Kind == ValID::t_LocalID || BBID.Kind == ValID::t_LocalName) && + "Expected local id or name"); + BasicBlock *BB; + if (BBID.Kind == ValID::t_LocalName) + BB = GetBB(BBID.StrVal, BBID.Loc); + else + BB = GetBB(BBID.UIntVal, BBID.Loc); + if (!BB) + return P.Error(BBID.Loc, "referenced value is not a basic block"); + + GV->replaceAllUsesWith(BlockAddress::get(&F, BB)); + GV->eraseFromParent(); + } + + P.ForwardRefBlockAddresses.erase(Blocks); + return false; +} /// ParseFunctionBody -/// ::= '{' BasicBlock+ '}' -/// +/// ::= '{' BasicBlock+ UseListOrderDirective* '}' bool LLParser::ParseFunctionBody(Function &Fn) { if (Lex.getKind() != lltok::lbrace) return TokError("expected '{' in function body"); @@ -3331,13 +3399,24 @@ bool LLParser::ParseFunctionBody(Function &Fn) { PerFunctionState PFS(*this, Fn, FunctionNumber); + // Resolve block addresses and allow basic blocks to be forward-declared + // within this function. + if (PFS.resolveForwardRefBlockAddresses()) + return true; + SaveAndRestore ScopeExit(BlockAddressPFS, &PFS); + // We need at least one basic block. - if (Lex.getKind() == lltok::rbrace) + if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_uselistorder) return TokError("function body requires at least one basic block"); - while (Lex.getKind() != lltok::rbrace) + while (Lex.getKind() != lltok::rbrace && + Lex.getKind() != lltok::kw_uselistorder) if (ParseBasicBlock(PFS)) return true; + while (Lex.getKind() != lltok::rbrace) + if (ParseUseListOrder(&PFS)) + return true; + // Eat the }. Lex.Lex(); @@ -3656,7 +3735,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) { ParseTypeAndBasicBlock(DestBB, PFS)) return true; - if (!SeenCases.insert(Constant)) + if (!SeenCases.insert(Constant).second) return Error(CondLoc, "duplicate case value in switch"); if (!isa(Constant)) return Error(CondLoc, "case value is not a constant integer"); @@ -3722,7 +3801,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { AttrBuilder RetAttrs, FnAttrs; std::vector FwdRefAttrGrps; LocTy NoBuiltinLoc; - CallingConv::ID CC; + unsigned CC; Type *RetType = nullptr; LocTy RetTypeLoc; ValID CalleeID; @@ -4136,7 +4215,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, AttrBuilder RetAttrs, FnAttrs; std::vector FwdRefAttrGrps; LocTy BuiltinLoc; - CallingConv::ID CC; + unsigned CC; Type *RetType = nullptr; LocTy RetTypeLoc; ValID CalleeID; @@ -4149,7 +4228,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, ParseOptionalReturnAttrs(RetAttrs) || ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || - ParseParameterList(ArgList, PFS) || + ParseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail, + PFS.getFunction().isVarArg()) || ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, BuiltinLoc)) return true; @@ -4596,3 +4676,135 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts, return false; } + +//===----------------------------------------------------------------------===// +// Use-list order directives. +//===----------------------------------------------------------------------===// +bool LLParser::sortUseListOrder(Value *V, ArrayRef Indexes, + SMLoc Loc) { + if (V->use_empty()) + return Error(Loc, "value has no uses"); + + unsigned NumUses = 0; + SmallDenseMap Order; + for (const Use &U : V->uses()) { + if (++NumUses > Indexes.size()) + break; + Order[&U] = Indexes[NumUses - 1]; + } + if (NumUses < 2) + return Error(Loc, "value only has one use"); + if (Order.size() != Indexes.size() || NumUses > Indexes.size()) + return Error(Loc, "wrong number of indexes, expected " + + Twine(std::distance(V->use_begin(), V->use_end()))); + + V->sortUseList([&](const Use &L, const Use &R) { + return Order.lookup(&L) < Order.lookup(&R); + }); + return false; +} + +/// ParseUseListOrderIndexes +/// ::= '{' uint32 (',' uint32)+ '}' +bool LLParser::ParseUseListOrderIndexes(SmallVectorImpl &Indexes) { + SMLoc Loc = Lex.getLoc(); + if (ParseToken(lltok::lbrace, "expected '{' here")) + return true; + if (Lex.getKind() == lltok::rbrace) + return Lex.Error("expected non-empty list of uselistorder indexes"); + + // Use Offset, Max, and IsOrdered to check consistency of indexes. The + // indexes should be distinct numbers in the range [0, size-1], and should + // not be in order. + unsigned Offset = 0; + unsigned Max = 0; + bool IsOrdered = true; + assert(Indexes.empty() && "Expected empty order vector"); + do { + unsigned Index; + if (ParseUInt32(Index)) + return true; + + // Update consistency checks. + Offset += Index - Indexes.size(); + Max = std::max(Max, Index); + IsOrdered &= Index == Indexes.size(); + + Indexes.push_back(Index); + } while (EatIfPresent(lltok::comma)); + + if (ParseToken(lltok::rbrace, "expected '}' here")) + return true; + + if (Indexes.size() < 2) + return Error(Loc, "expected >= 2 uselistorder indexes"); + if (Offset != 0 || Max >= Indexes.size()) + return Error(Loc, "expected distinct uselistorder indexes in range [0, size)"); + if (IsOrdered) + return Error(Loc, "expected uselistorder indexes to change the order"); + + return false; +} + +/// ParseUseListOrder +/// ::= 'uselistorder' Type Value ',' UseListOrderIndexes +bool LLParser::ParseUseListOrder(PerFunctionState *PFS) { + SMLoc Loc = Lex.getLoc(); + if (ParseToken(lltok::kw_uselistorder, "expected uselistorder directive")) + return true; + + Value *V; + SmallVector Indexes; + if (ParseTypeAndValue(V, PFS) || + ParseToken(lltok::comma, "expected comma in uselistorder directive") || + ParseUseListOrderIndexes(Indexes)) + return true; + + return sortUseListOrder(V, Indexes, Loc); +} + +/// ParseUseListOrderBB +/// ::= 'uselistorder_bb' @foo ',' %bar ',' UseListOrderIndexes +bool LLParser::ParseUseListOrderBB() { + assert(Lex.getKind() == lltok::kw_uselistorder_bb); + SMLoc Loc = Lex.getLoc(); + Lex.Lex(); + + ValID Fn, Label; + SmallVector Indexes; + if (ParseValID(Fn) || + ParseToken(lltok::comma, "expected comma in uselistorder_bb directive") || + ParseValID(Label) || + ParseToken(lltok::comma, "expected comma in uselistorder_bb directive") || + ParseUseListOrderIndexes(Indexes)) + return true; + + // Check the function. + GlobalValue *GV; + if (Fn.Kind == ValID::t_GlobalName) + GV = M->getNamedValue(Fn.StrVal); + else if (Fn.Kind == ValID::t_GlobalID) + GV = Fn.UIntVal < NumberedVals.size() ? NumberedVals[Fn.UIntVal] : nullptr; + else + return Error(Fn.Loc, "expected function name in uselistorder_bb"); + if (!GV) + return Error(Fn.Loc, "invalid function forward reference in uselistorder_bb"); + auto *F = dyn_cast(GV); + if (!F) + return Error(Fn.Loc, "expected function name in uselistorder_bb"); + if (F->isDeclaration()) + return Error(Fn.Loc, "invalid declaration in uselistorder_bb"); + + // Check the basic block. + if (Label.Kind == ValID::t_LocalID) + return Error(Label.Loc, "invalid numeric label in uselistorder_bb"); + if (Label.Kind != ValID::t_LocalName) + return Error(Label.Loc, "expected basic block name in uselistorder_bb"); + Value *V = F->getValueSymbolTable().lookup(Label.StrVal); + if (!V) + return Error(Label.Loc, "invalid basic block in uselistorder_bb"); + if (!isa(V)) + return Error(Label.Loc, "expected basic block in uselistorder_bb"); + + return sortUseListOrder(V, Indexes, Loc); +} diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 2efb260..aa62bcc 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ASMPARSER_LLPARSER_H -#define LLVM_ASMPARSER_LLPARSER_H +#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H +#define LLVM_LIB_ASMPARSER_LLPARSER_H #include "LLLexer.h" #include "llvm/ADT/DenseMap.h" @@ -128,17 +128,21 @@ namespace llvm { // References to blockaddress. The key is the function ValID, the value is // a list of references to blocks in that function. - std::map > > - ForwardRefBlockAddresses; + std::map> ForwardRefBlockAddresses; + class PerFunctionState; + /// Reference to per-function state to allow basic blocks to be + /// forward-referenced by blockaddress instructions within the same + /// function. + PerFunctionState *BlockAddressPFS; // Attribute builder reference information. std::map > ForwardRefAttrGroups; std::map NumberedAttrBuilders; public: - LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : - Context(m->getContext()), Lex(F, SM, Err, m->getContext()), - M(m) {} + LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *m) + : Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m), + BlockAddressPFS(nullptr) {} bool Run(); LLVMContext &getContext() { return Context; } @@ -202,6 +206,11 @@ namespace llvm { Loc = Lex.getLoc(); return ParseUInt32(Val); } + bool ParseUInt64(uint64_t &Val); + bool ParseUInt64(uint64_t &Val, LocTy &Loc) { + Loc = Lex.getLoc(); + return ParseUInt64(Val); + } bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); @@ -217,8 +226,9 @@ namespace llvm { } bool ParseOptionalVisibility(unsigned &Visibility); bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass); - bool ParseOptionalCallingConv(CallingConv::ID &CC); + bool ParseOptionalCallingConv(unsigned &CC); bool ParseOptionalAlignment(unsigned &Alignment); + bool ParseOptionalDereferenceableBytes(uint64_t &Bytes); bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, AtomicOrdering &Ordering); bool ParseOrdering(AtomicOrdering &Ordering); @@ -252,8 +262,8 @@ namespace llvm { bool HasLinkage, unsigned Visibility, unsigned DLLStorageClass, GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr); - bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility, - unsigned DLLStorageClass, + bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Linkage, + unsigned Visibility, unsigned DLLStorageClass, GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr); bool parseComdat(); bool ParseStandaloneMetadata(); @@ -321,6 +331,8 @@ namespace llvm { /// unnamed. If there is an error, this returns null otherwise it returns /// the block being defined. BasicBlock *DefineBB(const std::string &Name, LocTy Loc); + + bool resolveForwardRefBlockAddresses(); }; bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, @@ -360,13 +372,15 @@ namespace llvm { : Loc(loc), V(v), Attrs(attrs) {} }; bool ParseParameterList(SmallVectorImpl &ArgList, - PerFunctionState &PFS); + PerFunctionState &PFS, + bool IsMustTailCall = false, + bool InVarArgsFunc = false); // Constant Parsing. bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr); bool ParseGlobalValue(Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); - bool ParseGlobalValueVector(SmallVectorImpl &Elts); + bool ParseGlobalValueVector(SmallVectorImpl &Elts); bool parseOptionalComdat(Comdat *&C); bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS); bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS); @@ -427,9 +441,11 @@ namespace llvm { int ParseExtractValue(Instruction *&I, PerFunctionState &PFS); int ParseInsertValue(Instruction *&I, PerFunctionState &PFS); - bool ResolveForwardRefBlockAddresses(Function *TheFn, - std::vector > &Refs, - PerFunctionState *PFS); + // Use-list order directives. + bool ParseUseListOrder(PerFunctionState *PFS = nullptr); + bool ParseUseListOrderBB(); + bool ParseUseListOrderIndexes(SmallVectorImpl &Indexes); + bool sortUseListOrder(Value *V, ArrayRef Indexes, SMLoc Loc); }; } // End llvm namespace diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 534d824..f9821f7 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LIBS_ASMPARSER_LLTOKEN_H -#define LIBS_ASMPARSER_LLTOKEN_H +#ifndef LLVM_LIB_ASMPARSER_LLTOKEN_H +#define LLVM_LIB_ASMPARSER_LLTOKEN_H namespace llvm { namespace lltok { @@ -39,8 +39,6 @@ namespace lltok { kw_private, kw_internal, - kw_linker_private, // NOTE: deprecated, for parser compatibility - kw_linker_private_weak, // NOTE: deprecated, for parser compatibility kw_linkonce, kw_linkonce_odr, kw_weak, // Used as a linkage, and a modifier for "cmpxchg". kw_weak_odr, kw_appending, @@ -89,7 +87,7 @@ namespace lltok { kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_intel_ocl_bicc, - kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, + kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_x86_vectorcallcc, kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc, kw_msp430_intrcc, kw_ptx_kernel, kw_ptx_device, @@ -106,6 +104,7 @@ namespace lltok { kw_byval, kw_inalloca, kw_cold, + kw_dereferenceable, kw_inlinehint, kw_inreg, kw_jumptable, @@ -181,6 +180,9 @@ namespace lltok { kw_extractelement, kw_insertelement, kw_shufflevector, kw_extractvalue, kw_insertvalue, kw_blockaddress, + // Use-list order directives. + kw_uselistorder, kw_uselistorder_bb, + // Unsigned Valued tokens (UIntVal). GlobalID, // @42 LocalVarID, // %42 diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index 91bb51c..0815907 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -21,26 +21,29 @@ #include using namespace llvm; -Module *llvm::ParseAssembly(MemoryBuffer *F, - Module *M, - SMDiagnostic &Err, - LLVMContext &Context) { +bool llvm::parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err) { SourceMgr SM; - SM.AddNewSourceBuffer(F, SMLoc()); + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(F, false); + SM.AddNewSourceBuffer(std::move(Buf), SMLoc()); - // If we are parsing into an existing module, do it. - if (M) - return LLParser(F, SM, Err, M).Run() ? nullptr : M; + return LLParser(F.getBuffer(), SM, Err, &M).Run(); +} + +std::unique_ptr llvm::parseAssembly(MemoryBufferRef F, + SMDiagnostic &Err, + LLVMContext &Context) { + std::unique_ptr M = + make_unique(F.getBufferIdentifier(), Context); - // Otherwise create a new module. - std::unique_ptr M2(new Module(F->getBufferIdentifier(), Context)); - if (LLParser(F, SM, Err, M2.get()).Run()) + if (parseAssemblyInto(F, *M, Err)) return nullptr; - return M2.release(); + + return std::move(M); } -Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, - LLVMContext &Context) { +std::unique_ptr llvm::parseAssemblyFile(StringRef Filename, + SMDiagnostic &Err, + LLVMContext &Context) { ErrorOr> FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = FileOrErr.getError()) { @@ -49,13 +52,12 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, return nullptr; } - return ParseAssembly(FileOrErr.get().release(), nullptr, Err, Context); + return parseAssembly(FileOrErr.get()->getMemBufferRef(), Err, Context); } -Module *llvm::ParseAssemblyString(const char *AsmString, Module *M, - SMDiagnostic &Err, LLVMContext &Context) { - MemoryBuffer *F = - MemoryBuffer::getMemBuffer(StringRef(AsmString), ""); - - return ParseAssembly(F, M, Err, Context); +std::unique_ptr llvm::parseAssemblyString(StringRef AsmString, + SMDiagnostic &Err, + LLVMContext &Context) { + MemoryBufferRef F(AsmString, ""); + return parseAssembly(F, Err, Context); } diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index b5886c1..9b3acb5 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -31,7 +31,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, LLVMModuleRef *OutModule, char **OutMessage) { ErrorOr ModuleOrErr = - parseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef)); + parseBitcodeFile(unwrap(MemBuf)->getMemBufferRef(), *unwrap(ContextRef)); if (std::error_code EC = ModuleOrErr.getError()) { if (OutMessage) *OutMessage = strdup(EC.message().c_str()); @@ -51,8 +51,11 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, LLVMModuleRef *OutM, char **OutMessage) { std::string Message; + std::unique_ptr Owner(unwrap(MemBuf)); + ErrorOr ModuleOrErr = - getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef)); + getLazyBitcodeModule(std::move(Owner), *unwrap(ContextRef)); + Owner.release(); if (std::error_code EC = ModuleOrErr.getError()) { *OutM = wrap((Module *)nullptr); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 192f753..b2ca22c 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -25,17 +25,45 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ManagedStatic.h" + using namespace llvm; enum { SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex }; -void BitcodeReader::materializeForwardReferencedFunctions() { - while (!BlockAddrFwdRefs.empty()) { - Function *F = BlockAddrFwdRefs.begin()->first; - F->Materialize(); +std::error_code BitcodeReader::materializeForwardReferencedFunctions() { + if (WillMaterializeAllForwardRefs) + return std::error_code(); + + // Prevent recursion. + WillMaterializeAllForwardRefs = true; + + while (!BasicBlockFwdRefQueue.empty()) { + Function *F = BasicBlockFwdRefQueue.front(); + BasicBlockFwdRefQueue.pop_front(); + assert(F && "Expected valid function"); + if (!BasicBlockFwdRefs.count(F)) + // Already materialized. + continue; + + // Check for a function that isn't materializable to prevent an infinite + // loop. When parsing a blockaddress stored in a global variable, there + // isn't a trivial way to check if a function will have a body without a + // linear search through FunctionsWithBodies, so just check it here. + if (!F->isMaterializable()) + return Error(BitcodeError::NeverResolvedFunctionFromBlockAddress); + + // Try to materialize F. + if (std::error_code EC = materialize(F)) + return EC; } + assert(BasicBlockFwdRefs.empty() && "Function missing from queue"); + + // Reset state. + WillMaterializeAllForwardRefs = false; + return std::error_code(); } void BitcodeReader::FreeState() { @@ -51,7 +79,8 @@ void BitcodeReader::FreeState() { DeferredFunctionInfo.clear(); MDKindMap.clear(); - assert(BlockAddrFwdRefs.empty() && "Unresolved blockaddress fwd references"); + assert(BasicBlockFwdRefs.empty() && "Unresolved blockaddress fwd references"); + BasicBlockFwdRefQueue.clear(); } //===----------------------------------------------------------------------===// @@ -487,10 +516,10 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B, std::error_code BitcodeReader::ParseAttributeBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (!MAttributes.empty()) - return Error(InvalidMultipleBlocks); + return Error(BitcodeError::InvalidMultipleBlocks); SmallVector Record; @@ -503,7 +532,7 @@ std::error_code BitcodeReader::ParseAttributeBlock() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: @@ -519,7 +548,7 @@ std::error_code BitcodeReader::ParseAttributeBlock() { case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...] // FIXME: Remove in 4.0. if (Record.size() & 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B; @@ -588,6 +617,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) { return Attribute::NonLazyBind; case bitc::ATTR_KIND_NON_NULL: return Attribute::NonNull; + case bitc::ATTR_KIND_DEREFERENCEABLE: + return Attribute::Dereferenceable; case bitc::ATTR_KIND_NO_RED_ZONE: return Attribute::NoRedZone; case bitc::ATTR_KIND_NO_RETURN: @@ -635,16 +666,16 @@ std::error_code BitcodeReader::ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) { *Kind = GetAttrFromCode(Code); if (*Kind == Attribute::None) - return Error(InvalidValue); + return Error(BitcodeError::InvalidValue); return std::error_code(); } std::error_code BitcodeReader::ParseAttributeGroupBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (!MAttributeGroups.empty()) - return Error(InvalidMultipleBlocks); + return Error(BitcodeError::InvalidMultipleBlocks); SmallVector Record; @@ -655,7 +686,7 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: @@ -670,7 +701,7 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() { break; case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...] if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); uint64_t GrpID = Record[0]; uint64_t Idx = Record[1]; // Index of the object this attribute refers to. @@ -683,14 +714,16 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() { return EC; B.addAttribute(Kind); - } else if (Record[i] == 1) { // Align attribute + } else if (Record[i] == 1) { // Integer attribute Attribute::AttrKind Kind; if (std::error_code EC = ParseAttrKind(Record[++i], &Kind)) return EC; if (Kind == Attribute::Alignment) B.addAlignmentAttr(Record[++i]); - else + else if (Kind == Attribute::StackAlignment) B.addStackAlignmentAttr(Record[++i]); + else if (Kind == Attribute::Dereferenceable) + B.addDereferenceableAttr(Record[++i]); } else { // String attribute assert((Record[i] == 3 || Record[i] == 4) && "Invalid attribute group entry"); @@ -723,14 +756,14 @@ std::error_code BitcodeReader::ParseAttributeGroupBlock() { std::error_code BitcodeReader::ParseTypeTable() { if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); return ParseTypeTableBody(); } std::error_code BitcodeReader::ParseTypeTableBody() { if (!TypeList.empty()) - return Error(InvalidMultipleBlocks); + return Error(BitcodeError::InvalidMultipleBlocks); SmallVector Record; unsigned NumRecords = 0; @@ -744,10 +777,10 @@ std::error_code BitcodeReader::ParseTypeTableBody() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: if (NumRecords != TypeList.size()) - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); return std::error_code(); case BitstreamEntry::Record: // The interesting case. @@ -759,12 +792,12 @@ std::error_code BitcodeReader::ParseTypeTableBody() { Type *ResultTy = nullptr; switch (Stream.readRecord(Entry.ID, Record)) { default: - return Error(InvalidValue); + return Error(BitcodeError::InvalidValue); case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] // TYPE_CODE_NUMENTRY contains a count of the number of types in the // type list. This allows us to reserve space. if (Record.size() < 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); TypeList.resize(Record[0]); continue; case bitc::TYPE_CODE_VOID: // VOID @@ -799,20 +832,20 @@ std::error_code BitcodeReader::ParseTypeTableBody() { break; case bitc::TYPE_CODE_INTEGER: // INTEGER: [width] if (Record.size() < 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); ResultTy = IntegerType::get(Context, Record[0]); break; case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or // [pointee type, address space] if (Record.size() < 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned AddressSpace = 0; if (Record.size() == 2) AddressSpace = Record[1]; ResultTy = getTypeByID(Record[0]); if (!ResultTy) - return Error(InvalidType); + return Error(BitcodeError::InvalidType); ResultTy = PointerType::get(ResultTy, AddressSpace); break; } @@ -820,7 +853,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() { // FIXME: attrid is dead, remove it in LLVM 4.0 // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector ArgTys; for (unsigned i = 3, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -831,7 +864,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() { ResultTy = getTypeByID(Record[2]); if (!ResultTy || ArgTys.size() < Record.size()-3) - return Error(InvalidType); + return Error(BitcodeError::InvalidType); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; @@ -839,7 +872,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() { case bitc::TYPE_CODE_FUNCTION: { // FUNCTION: [vararg, retty, paramty x N] if (Record.size() < 2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector ArgTys; for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -850,14 +883,14 @@ std::error_code BitcodeReader::ParseTypeTableBody() { ResultTy = getTypeByID(Record[1]); if (!ResultTy || ArgTys.size() < Record.size()-2) - return Error(InvalidType); + return Error(BitcodeError::InvalidType); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -866,21 +899,21 @@ std::error_code BitcodeReader::ParseTypeTableBody() { break; } if (EltTys.size() != Record.size()-1) - return Error(InvalidType); + return Error(BitcodeError::InvalidType); ResultTy = StructType::get(Context, EltTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N] if (ConvertToString(Record, 0, TypeName)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); continue; case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (NumRecords >= TypeList.size()) - return Error(InvalidTYPETable); + return Error(BitcodeError::InvalidTYPETable); // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); @@ -899,17 +932,17 @@ std::error_code BitcodeReader::ParseTypeTableBody() { break; } if (EltTys.size() != Record.size()-1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Res->setBody(EltTys, Record[0]); ResultTy = Res; break; } case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: [] if (Record.size() != 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (NumRecords >= TypeList.size()) - return Error(InvalidTYPETable); + return Error(BitcodeError::InvalidTYPETable); // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); @@ -924,24 +957,24 @@ std::error_code BitcodeReader::ParseTypeTableBody() { } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if ((ResultTy = getTypeByID(Record[1]))) ResultTy = ArrayType::get(ResultTy, Record[0]); else - return Error(InvalidType); + return Error(BitcodeError::InvalidType); break; case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] if (Record.size() < 2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if ((ResultTy = getTypeByID(Record[1]))) ResultTy = VectorType::get(ResultTy, Record[0]); else - return Error(InvalidType); + return Error(BitcodeError::InvalidType); break; } if (NumRecords >= TypeList.size()) - return Error(InvalidTYPETable); + return Error(BitcodeError::InvalidTYPETable); assert(ResultTy && "Didn't read a type?"); assert(!TypeList[NumRecords] && "Already read type?"); TypeList[NumRecords++] = ResultTy; @@ -950,7 +983,7 @@ std::error_code BitcodeReader::ParseTypeTableBody() { std::error_code BitcodeReader::ParseValueSymbolTable() { if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Record; @@ -962,7 +995,7 @@ std::error_code BitcodeReader::ParseValueSymbolTable() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: @@ -977,10 +1010,10 @@ std::error_code BitcodeReader::ParseValueSymbolTable() { break; case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N] if (ConvertToString(Record, 1, ValueName)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned ValueID = Record[0]; if (ValueID >= ValueList.size() || !ValueList[ValueID]) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Value *V = ValueList[ValueID]; V->setName(StringRef(ValueName.data(), ValueName.size())); @@ -989,10 +1022,10 @@ std::error_code BitcodeReader::ParseValueSymbolTable() { } case bitc::VST_CODE_BBENTRY: { if (ConvertToString(Record, 1, ValueName)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); BasicBlock *BB = getBasicBlock(Record[0]); if (!BB) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); BB->setName(StringRef(ValueName.data(), ValueName.size())); ValueName.clear(); @@ -1006,7 +1039,7 @@ std::error_code BitcodeReader::ParseMetadata() { unsigned NextMDValueNo = MDValueList.size(); if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Record; @@ -1017,7 +1050,7 @@ std::error_code BitcodeReader::ParseMetadata() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: @@ -1048,7 +1081,7 @@ std::error_code BitcodeReader::ParseMetadata() { for (unsigned i = 0; i != Size; ++i) { MDNode *MD = dyn_cast_or_null(MDValueList.getValueFwdRef(Record[i])); if (!MD) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); NMD->addOperand(MD); } break; @@ -1058,14 +1091,14 @@ std::error_code BitcodeReader::ParseMetadata() { // fall-through case bitc::METADATA_NODE: { if (Record.size() % 2 == 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned Size = Record.size(); SmallVector Elts; for (unsigned i = 0; i != Size; i += 2) { Type *Ty = getTypeByID(Record[i]); if (!Ty) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (Ty->isMetadataTy()) Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); else if (!Ty->isVoidTy()) @@ -1087,14 +1120,14 @@ std::error_code BitcodeReader::ParseMetadata() { } case bitc::METADATA_KIND: { if (Record.size() < 2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned Kind = Record[0]; SmallString<8> Name(Record.begin()+1, Record.end()); unsigned NewKind = TheModule->getMDKindID(Name.str()); if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second) - return Error(ConflictingMETADATA_KINDRecords); + return Error(BitcodeError::ConflictingMETADATA_KINDRecords); break; } } @@ -1132,7 +1165,7 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) GlobalInitWorklist.back().first->setInitializer(C); else - return Error(ExpectedConstant); + return Error(BitcodeError::ExpectedConstant); } GlobalInitWorklist.pop_back(); } @@ -1145,7 +1178,7 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) AliasInitWorklist.back().first->setAliasee(C); else - return Error(ExpectedConstant); + return Error(BitcodeError::ExpectedConstant); } AliasInitWorklist.pop_back(); } @@ -1158,7 +1191,7 @@ std::error_code BitcodeReader::ResolveGlobalAndAliasInits() { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) FunctionPrefixWorklist.back().first->setPrefixData(C); else - return Error(ExpectedConstant); + return Error(BitcodeError::ExpectedConstant); } FunctionPrefixWorklist.pop_back(); } @@ -1176,7 +1209,7 @@ static APInt ReadWideAPInt(ArrayRef Vals, unsigned TypeBits) { std::error_code BitcodeReader::ParseConstants() { if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Record; @@ -1189,10 +1222,10 @@ std::error_code BitcodeReader::ParseConstants() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: if (NextCstNo != ValueList.size()) - return Error(InvalidConstantReference); + return Error(BitcodeError::InvalidConstantReference); // Once all the constants have been read, go through and resolve forward // references. @@ -1214,9 +1247,9 @@ std::error_code BitcodeReader::ParseConstants() { break; case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid] if (Record.empty()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (Record[0] >= TypeList.size() || !TypeList[Record[0]]) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); CurTy = TypeList[Record[0]]; continue; // Skip the ValueList manipulation. case bitc::CST_CODE_NULL: // NULL @@ -1224,12 +1257,12 @@ std::error_code BitcodeReader::ParseConstants() { break; case bitc::CST_CODE_INTEGER: // INTEGER: [intval] if (!CurTy->isIntegerTy() || Record.empty()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0])); break; case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] if (!CurTy->isIntegerTy() || Record.empty()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); APInt VInt = ReadWideAPInt(Record, cast(CurTy)->getBitWidth()); @@ -1239,7 +1272,7 @@ std::error_code BitcodeReader::ParseConstants() { } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (CurTy->isHalfTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf, APInt(16, (uint16_t)Record[0]))); @@ -1269,7 +1302,7 @@ std::error_code BitcodeReader::ParseConstants() { case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number] if (Record.empty()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned Size = Record.size(); SmallVector Elts; @@ -1297,7 +1330,7 @@ std::error_code BitcodeReader::ParseConstants() { case bitc::CST_CODE_STRING: // STRING: [values] case bitc::CST_CODE_CSTRING: { // CSTRING: [values] if (Record.empty()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallString<16> Elts(Record.begin(), Record.end()); V = ConstantDataArray::getString(Context, Elts, @@ -1306,7 +1339,7 @@ std::error_code BitcodeReader::ParseConstants() { } case bitc::CST_CODE_DATA: {// DATA: [n x value] if (Record.empty()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *EltTy = cast(CurTy)->getElementType(); unsigned Size = Record.size(); @@ -1351,14 +1384,14 @@ std::error_code BitcodeReader::ParseConstants() { else V = ConstantDataArray::get(Context, Elts); } else { - return Error(InvalidTypeForValue); + return Error(BitcodeError::InvalidTypeForValue); } break; } case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval] if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); int Opc = GetDecodedBinaryOpcode(Record[0], CurTy); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown binop. @@ -1389,14 +1422,14 @@ std::error_code BitcodeReader::ParseConstants() { } case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval] if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); int Opc = GetDecodedCastOpcode(Record[0]); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown cast. } else { Type *OpTy = getTypeByID(Record[1]); if (!OpTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy); V = UpgradeBitCastExpr(Opc, Op, CurTy); if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy); @@ -1406,12 +1439,12 @@ std::error_code BitcodeReader::ParseConstants() { case bitc::CST_CODE_CE_INBOUNDS_GEP: case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands] if (Record.size() & 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Elts; for (unsigned i = 0, e = Record.size(); i != e; i += 2) { Type *ElTy = getTypeByID(Record[i]); if (!ElTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy)); } ArrayRef Indices(Elts.begin() + 1, Elts.end()); @@ -1422,7 +1455,7 @@ std::error_code BitcodeReader::ParseConstants() { } case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#] if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *SelectorTy = Type::getInt1Ty(Context); @@ -1441,22 +1474,22 @@ std::error_code BitcodeReader::ParseConstants() { case bitc::CST_CODE_CE_EXTRACTELT : { // CE_EXTRACTELT: [opty, opval, opty, opval] if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (!OpTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = nullptr; if (Record.size() == 4) { Type *IdxTy = getTypeByID(Record[2]); if (!IdxTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy); } else // TODO: Remove with llvm 4.0 Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); if (!Op1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); V = ConstantExpr::getExtractElement(Op0, Op1); break; } @@ -1464,7 +1497,7 @@ std::error_code BitcodeReader::ParseConstants() { : { // CE_INSERTELT: [opval, opval, opty, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || !OpTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy->getElementType()); @@ -1472,19 +1505,19 @@ std::error_code BitcodeReader::ParseConstants() { if (Record.size() == 4) { Type *IdxTy = getTypeByID(Record[2]); if (!IdxTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy); } else // TODO: Remove with llvm 4.0 Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); if (!Op2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); V = ConstantExpr::getInsertElement(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || !OpTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy); Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), @@ -1498,7 +1531,7 @@ std::error_code BitcodeReader::ParseConstants() { VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (Record.size() < 4 || !RTy || !OpTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), @@ -1509,10 +1542,10 @@ std::error_code BitcodeReader::ParseConstants() { } case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred] if (Record.size() < 4) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *OpTy = getTypeByID(Record[0]); if (!OpTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); @@ -1526,16 +1559,16 @@ std::error_code BitcodeReader::ParseConstants() { // FIXME: Remove with the 4.0 release. case bitc::CST_CODE_INLINEASM_OLD: { if (Record.size() < 2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = Record[0] >> 1; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; @@ -1550,17 +1583,17 @@ std::error_code BitcodeReader::ParseConstants() { // inteldialect). case bitc::CST_CODE_INLINEASM: { if (Record.size() < 2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = (Record[0] >> 1) & 1; unsigned AsmDialect = Record[0] >> 2; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; @@ -1574,35 +1607,46 @@ std::error_code BitcodeReader::ParseConstants() { } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *FnTy = getTypeByID(Record[0]); if (!FnTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Function *Fn = dyn_cast_or_null(ValueList.getConstantFwdRef(Record[1],FnTy)); if (!Fn) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); + + // Don't let Fn get dematerialized. + BlockAddressesTaken.insert(Fn); // If the function is already parsed we can insert the block address right // away. + BasicBlock *BB; + unsigned BBID = Record[2]; + if (!BBID) + // Invalid reference to entry block. + return Error(BitcodeError::InvalidID); if (!Fn->empty()) { Function::iterator BBI = Fn->begin(), BBE = Fn->end(); - for (size_t I = 0, E = Record[2]; I != E; ++I) { + for (size_t I = 0, E = BBID; I != E; ++I) { if (BBI == BBE) - return Error(InvalidID); + return Error(BitcodeError::InvalidID); ++BBI; } - V = BlockAddress::get(Fn, BBI); + BB = BBI; } else { // Otherwise insert a placeholder and remember it so it can be inserted // when the function is parsed. - GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(), - Type::getInt8Ty(Context), - false, GlobalValue::InternalLinkage, - nullptr, ""); - BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef)); - V = FwdRef; + auto &FwdBBs = BasicBlockFwdRefs[Fn]; + if (FwdBBs.empty()) + BasicBlockFwdRefQueue.push_back(Fn); + if (FwdBBs.size() < BBID + 1) + FwdBBs.resize(BBID + 1); + if (!FwdBBs[BBID]) + FwdBBs[BBID] = BasicBlock::Create(Context); + BB = FwdBBs[BBID]; } + V = BlockAddress::get(Fn, BB); break; } } @@ -1614,18 +1658,17 @@ std::error_code BitcodeReader::ParseConstants() { std::error_code BitcodeReader::ParseUseLists() { if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID)) - return Error(InvalidRecord); - - SmallVector Record; + return Error(BitcodeError::InvalidRecord); // Read all the records. + SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: @@ -1635,14 +1678,42 @@ std::error_code BitcodeReader::ParseUseLists() { // Read a use list record. Record.clear(); + bool IsBB = false; switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; - case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD. + case bitc::USELIST_CODE_BB: + IsBB = true; + // fallthrough + case bitc::USELIST_CODE_DEFAULT: { unsigned RecordLength = Record.size(); - if (RecordLength < 1) - return Error(InvalidRecord); - UseListRecords.push_back(Record); + if (RecordLength < 3) + // Records should have at least an ID and two indexes. + return Error(BitcodeError::InvalidRecord); + unsigned ID = Record.back(); + Record.pop_back(); + + Value *V; + if (IsBB) { + assert(ID < FunctionBBs.size() && "Basic block not found"); + V = FunctionBBs[ID]; + } else + V = ValueList[ID]; + unsigned NumUses = 0; + SmallDenseMap Order; + for (const Use &U : V->uses()) { + if (++NumUses > Record.size()) + break; + Order[&U] = Record[NumUses - 1]; + } + if (Order.size() != Record.size() || NumUses > Record.size()) + // Mismatches can happen if the functions are being materialized lazily + // (out-of-order), or a value has been upgraded. + break; + + V->sortUseList([&](const Use &L, const Use &R) { + return Order.lookup(&L) < Order.lookup(&R); + }); break; } } @@ -1655,7 +1726,7 @@ std::error_code BitcodeReader::ParseUseLists() { std::error_code BitcodeReader::RememberAndSkipFunctionBody() { // Get the function we are talking about. if (FunctionsWithBodies.empty()) - return Error(InsufficientFunctionProtos); + return Error(BitcodeError::InsufficientFunctionProtos); Function *Fn = FunctionsWithBodies.back(); FunctionsWithBodies.pop_back(); @@ -1666,7 +1737,7 @@ std::error_code BitcodeReader::RememberAndSkipFunctionBody() { // Skip over the function block for now. if (Stream.SkipBlock()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); return std::error_code(); } @@ -1674,7 +1745,7 @@ std::error_code BitcodeReader::GlobalCleanup() { // Patch the initializers for globals and aliases up. ResolveGlobalAndAliasInits(); if (!GlobalInits.empty() || !AliasInits.empty()) - return Error(MalformedGlobalInitializerSet); + return Error(BitcodeError::MalformedGlobalInitializerSet); // Look for intrinsic functions which need to be upgraded at some point for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); @@ -1703,7 +1774,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { if (Resume) Stream.JumpToBit(NextUnreadBit); else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Record; std::vector SectionTable; @@ -1715,7 +1786,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { switch (Entry.Kind) { case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return GlobalCleanup(); @@ -1723,11 +1794,11 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); break; case bitc::BLOCKINFO_BLOCK_ID: if (Stream.ReadBlockInfoBlock()) - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); break; case bitc::PARAMATTR_BLOCK_ID: if (std::error_code EC = ParseAttributeBlock()) @@ -1797,12 +1868,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] if (Record.size() < 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); // Only version #0 and #1 are supported so far. unsigned module_version = Record[0]; switch (module_version) { default: - return Error(InvalidValue); + return Error(BitcodeError::InvalidValue); case 0: UseRelativeIDs = false; break; @@ -1815,21 +1886,21 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); TheModule->setTargetTriple(S); break; } case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); TheModule->setDataLayout(S); break; } case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); TheModule->setModuleInlineAsm(S); break; } @@ -1837,27 +1908,27 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { // FIXME: Remove in 4.0. std::string S; if (ConvertToString(Record, 0, S)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); // Ignore value. break; } case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SectionTable.push_back(S); break; } case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); GCTable.push_back(S); break; } case bitc::MODULE_CODE_COMDAT: { // COMDAT: [selection_kind, name] if (Record.size() < 2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); unsigned ComdatNameSize = Record[1]; std::string ComdatName; @@ -1874,12 +1945,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { // unnamed_addr, dllstorageclass] case bitc::MODULE_CODE_GLOBALVAR: { if (Record.size() < 6) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *Ty = getTypeByID(Record[0]); if (!Ty) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (!Ty->isPointerTy()) - return Error(InvalidTypeForValue); + return Error(BitcodeError::InvalidTypeForValue); unsigned AddressSpace = cast(Ty)->getAddressSpace(); Ty = cast(Ty)->getElementType(); @@ -1889,7 +1960,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { std::string Section; if (Record[5]) { if (Record[5]-1 >= SectionTable.size()) - return Error(InvalidID); + return Error(BitcodeError::InvalidID); Section = SectionTable[Record[5]-1]; } GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; @@ -1942,16 +2013,16 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { // dllstorageclass] case bitc::MODULE_CODE_FUNCTION: { if (Record.size() < 8) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *Ty = getTypeByID(Record[0]); if (!Ty) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (!Ty->isPointerTy()) - return Error(InvalidTypeForValue); + return Error(BitcodeError::InvalidTypeForValue); FunctionType *FTy = dyn_cast(cast(Ty)->getElementType()); if (!FTy) - return Error(InvalidTypeForValue); + return Error(BitcodeError::InvalidTypeForValue); Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule); @@ -1964,7 +2035,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { Func->setAlignment((1 << Record[5]) >> 1); if (Record[6]) { if (Record[6]-1 >= SectionTable.size()) - return Error(InvalidID); + return Error(BitcodeError::InvalidID); Func->setSection(SectionTable[Record[6]-1]); } // Local linkage must have default visibility. @@ -1973,7 +2044,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { Func->setVisibility(GetDecodedVisibility(Record[7])); if (Record.size() > 8 && Record[8]) { if (Record[8]-1 > GCTable.size()) - return Error(InvalidID); + return Error(BitcodeError::InvalidID); Func->setGC(GCTable[Record[8]-1].c_str()); } bool UnnamedAddr = false; @@ -1999,8 +2070,10 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { // If this is a function with a body, remember the prototype we are // creating now, so that we can match up the body with them later. if (!isProto) { + Func->setIsMaterializable(true); FunctionsWithBodies.push_back(Func); - if (LazyStreamer) DeferredFunctionInfo[Func] = 0; + if (LazyStreamer) + DeferredFunctionInfo[Func] = 0; } break; } @@ -2008,13 +2081,13 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass] case bitc::MODULE_CODE_ALIAS: { if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *Ty = getTypeByID(Record[0]); if (!Ty) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); auto *PTy = dyn_cast(Ty); if (!PTy) - return Error(InvalidTypeForValue); + return Error(BitcodeError::InvalidTypeForValue); auto *NewGA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), @@ -2029,9 +2102,9 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { else UpgradeDLLImportExportLinkage(NewGA, Record[2]); if (Record.size() > 5) - NewGA->setThreadLocalMode(GetDecodedThreadLocalMode(Record[5])); + NewGA->setThreadLocalMode(GetDecodedThreadLocalMode(Record[5])); if (Record.size() > 6) - NewGA->setUnnamedAddr(Record[6]); + NewGA->setUnnamedAddr(Record[6]); ValueList.push_back(NewGA); AliasInits.push_back(std::make_pair(NewGA, Record[1])); break; @@ -2040,7 +2113,7 @@ std::error_code BitcodeReader::ParseModule(bool Resume) { case bitc::MODULE_CODE_PURGEVALS: // Trim down the value list to the specified size. if (Record.size() < 1 || Record[0] > ValueList.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); ValueList.shrinkTo(Record[0]); break; } @@ -2061,7 +2134,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) { Stream.Read(4) != 0xC || Stream.Read(4) != 0xE || Stream.Read(4) != 0xD) - return Error(InvalidBitcodeSignature); + return Error(BitcodeError::InvalidBitcodeSignature); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. @@ -2074,7 +2147,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) { switch (Entry.Kind) { case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return std::error_code(); @@ -2082,12 +2155,12 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) { switch (Entry.ID) { case bitc::BLOCKINFO_BLOCK_ID: if (Stream.ReadBlockInfoBlock()) - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); break; case bitc::MODULE_BLOCK_ID: // Reject multiple MODULE_BLOCK's in a single bitstream. if (TheModule) - return Error(InvalidMultipleBlocks); + return Error(BitcodeError::InvalidMultipleBlocks); TheModule = M; if (std::error_code EC = ParseModule(false)) return EC; @@ -2096,7 +2169,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) { break; default: if (Stream.SkipBlock()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); break; } continue; @@ -2111,14 +2184,14 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) { Stream.AtEndOfStream()) return std::error_code(); - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); } } } ErrorOr BitcodeReader::parseModuleTriple() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Record; @@ -2130,7 +2203,7 @@ ErrorOr BitcodeReader::parseModuleTriple() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return Triple; case BitstreamEntry::Record: @@ -2144,7 +2217,7 @@ ErrorOr BitcodeReader::parseModuleTriple() { case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Triple = S; break; } @@ -2165,7 +2238,7 @@ ErrorOr BitcodeReader::parseTriple() { Stream.Read(4) != 0xC || Stream.Read(4) != 0xE || Stream.Read(4) != 0xD) - return Error(InvalidBitcodeSignature); + return Error(BitcodeError::InvalidBitcodeSignature); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. @@ -2174,7 +2247,7 @@ ErrorOr BitcodeReader::parseTriple() { switch (Entry.Kind) { case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return std::error_code(); @@ -2184,7 +2257,7 @@ ErrorOr BitcodeReader::parseTriple() { // Ignore other sub-blocks. if (Stream.SkipBlock()) - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); continue; case BitstreamEntry::Record: @@ -2197,7 +2270,7 @@ ErrorOr BitcodeReader::parseTriple() { /// ParseMetadataAttachment - Parse metadata attachments. std::error_code BitcodeReader::ParseMetadataAttachment() { if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Record; while (1) { @@ -2206,7 +2279,7 @@ std::error_code BitcodeReader::ParseMetadataAttachment() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: @@ -2222,14 +2295,14 @@ std::error_code BitcodeReader::ParseMetadataAttachment() { case bitc::METADATA_ATTACHMENT: { unsigned RecordLength = Record.size(); if (Record.empty() || (RecordLength - 1) % 2 == 1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Instruction *Inst = InstructionList[Record[0]]; for (unsigned i = 1; i != RecordLength; i = i+2) { unsigned Kind = Record[i]; DenseMap::iterator I = MDKindMap.find(Kind); if (I == MDKindMap.end()) - return Error(InvalidID); + return Error(BitcodeError::InvalidID); Value *Node = MDValueList.getValueFwdRef(Record[i+1]); Inst->setMetadata(I->second, cast(Node)); if (I->second == LLVMContext::MD_tbaa) @@ -2244,7 +2317,7 @@ std::error_code BitcodeReader::ParseMetadataAttachment() { /// ParseFunctionBody - Lazily parse the specified function body block. std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); InstructionList.clear(); unsigned ModuleValueListSize = ValueList.size(); @@ -2267,7 +2340,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { switch (Entry.Kind) { case BitstreamEntry::Error: - return Error(MalformedBlock); + return Error(BitcodeError::MalformedBlock); case BitstreamEntry::EndBlock: goto OutOfRecordLoop; @@ -2275,7 +2348,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); break; case bitc::CONSTANTS_BLOCK_ID: if (std::error_code EC = ParseConstants()) @@ -2294,6 +2367,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (std::error_code EC = ParseMetadata()) return EC; break; + case bitc::USELIST_BLOCK_ID: + if (std::error_code EC = ParseUseLists()) + return EC; + break; } continue; @@ -2308,16 +2385,41 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject - return Error(InvalidValue); - case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks] + return Error(BitcodeError::InvalidValue); + case bitc::FUNC_CODE_DECLAREBLOCKS: { // DECLAREBLOCKS: [nblocks] if (Record.size() < 1 || Record[0] == 0) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); // Create all the basic blocks for the function. FunctionBBs.resize(Record[0]); - for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) - FunctionBBs[i] = BasicBlock::Create(Context, "", F); + + // See if anything took the address of blocks in this function. + auto BBFRI = BasicBlockFwdRefs.find(F); + if (BBFRI == BasicBlockFwdRefs.end()) { + for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) + FunctionBBs[i] = BasicBlock::Create(Context, "", F); + } else { + auto &BBRefs = BBFRI->second; + // Check for invalid basic block references. + if (BBRefs.size() > FunctionBBs.size()) + return Error(BitcodeError::InvalidID); + assert(!BBRefs.empty() && "Unexpected empty array"); + assert(!BBRefs.front() && "Invalid reference to entry block"); + for (unsigned I = 0, E = FunctionBBs.size(), RE = BBRefs.size(); I != E; + ++I) + if (I < RE && BBRefs[I]) { + BBRefs[I]->insertInto(F); + FunctionBBs[I] = BBRefs[I]; + } else { + FunctionBBs[I] = BasicBlock::Create(Context, "", F); + } + + // Erase from the table. + BasicBlockFwdRefs.erase(BBFRI); + } + CurBB = FunctionBBs[0]; continue; + } case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same @@ -2332,7 +2434,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { I = &FunctionBBs[CurBBNo-1]->back(); if (!I) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I->setDebugLoc(LastLoc); I = nullptr; continue; @@ -2345,7 +2447,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { !FunctionBBs[CurBBNo-1]->empty()) I = &FunctionBBs[CurBBNo-1]->back(); if (!I || Record.size() < 4) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned Line = Record[0], Col = Record[1]; unsigned ScopeID = Record[2], IAID = Record[3]; @@ -2365,11 +2467,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 > Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType()); if (Opc == -1) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); InstructionList.push_back(I); if (OpNum < Record.size()) { @@ -2411,12 +2513,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+2 != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *ResTy = getTypeByID(Record[OpNum]); int Opc = GetDecodedCastOpcode(Record[OpNum+1]); if (Opc == -1 || !ResTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Instruction *Temp = nullptr; if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) { if (Temp) { @@ -2434,13 +2536,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *BasePtr; if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector GEPIdx; while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); GEPIdx.push_back(Op); } @@ -2456,14 +2558,14 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Agg; if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector EXTRACTVALIdx; for (unsigned RecSize = Record.size(); OpNum != RecSize; ++OpNum) { uint64_t Index = Record[OpNum]; if ((unsigned)Index != Index) - return Error(InvalidValue); + return Error(BitcodeError::InvalidValue); EXTRACTVALIdx.push_back((unsigned)Index); } @@ -2477,17 +2579,17 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Agg; if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Value *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Val)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector INSERTVALIdx; for (unsigned RecSize = Record.size(); OpNum != RecSize; ++OpNum) { uint64_t Index = Record[OpNum]; if ((unsigned)Index != Index) - return Error(InvalidValue); + return Error(BitcodeError::InvalidValue); INSERTVALIdx.push_back((unsigned)Index); } @@ -2504,7 +2606,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = SelectInst::Create(Cond, TrueVal, FalseVal); InstructionList.push_back(I); @@ -2519,18 +2621,18 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || getValueTypePair(Record, OpNum, NextValueNo, Cond)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); // select condition can be either i1 or [N x i1] if (VectorType* vector_type = dyn_cast(Cond->getType())) { // expect if (vector_type->getElementType() != Type::getInt1Ty(Context)) - return Error(InvalidTypeForValue); + return Error(BitcodeError::InvalidTypeForValue); } else { // expect i1 if (Cond->getType() != Type::getInt1Ty(Context)) - return Error(InvalidTypeForValue); + return Error(BitcodeError::InvalidTypeForValue); } I = SelectInst::Create(Cond, TrueVal, FalseVal); @@ -2543,7 +2645,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { Value *Vec, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || getValueTypePair(Record, OpNum, NextValueNo, Idx)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = ExtractElementInst::Create(Vec, Idx); InstructionList.push_back(I); break; @@ -2556,7 +2658,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Vec->getType())->getElementType(), Elt) || getValueTypePair(Record, OpNum, NextValueNo, Idx)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = InsertElementInst::Create(Vec, Elt, Idx); InstructionList.push_back(I); break; @@ -2567,10 +2669,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { Value *Vec1, *Vec2, *Mask; if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) || popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = new ShuffleVectorInst(Vec1, Vec2, Mask); InstructionList.push_back(I); break; @@ -2588,7 +2690,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (LHS->getType()->isFPOrFPVectorTy()) I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); @@ -2610,9 +2712,9 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Op = nullptr; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (OpNum != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = ReturnInst::Create(Context, Op); InstructionList.push_back(I); @@ -2620,10 +2722,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#] if (Record.size() != 1 && Record.size() != 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); BasicBlock *TrueDest = getBasicBlock(Record[0]); if (!TrueDest) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (Record.size() == 1) { I = BranchInst::Create(TrueDest); @@ -2634,7 +2736,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { Value *Cond = getValue(Record, 2, NextValueNo, Type::getInt1Ty(Context)); if (!FalseDest || !Cond) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = BranchInst::Create(TrueDest, FalseDest, Cond); InstructionList.push_back(I); } @@ -2654,7 +2756,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { Value *Cond = getValue(Record, 2, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[3]); if (!OpTy || !Cond || !Default) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned NumCases = Record[4]; @@ -2706,12 +2808,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { // Old SwitchInst format without case ranges. if (Record.size() < 3 || (Record.size() & 1) == 0) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *OpTy = getTypeByID(Record[0]); Value *Cond = getValue(Record, 1, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[2]); if (!OpTy || !Cond || !Default) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned NumCases = (Record.size()-3)/2; SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); InstructionList.push_back(SI); @@ -2721,7 +2823,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]); if (!CaseVal || !DestBB) { delete SI; - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); } SI->addCase(CaseVal, DestBB); } @@ -2730,11 +2832,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...] if (Record.size() < 2) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *OpTy = getTypeByID(Record[0]); Value *Address = getValue(Record, 1, NextValueNo, OpTy); if (!OpTy || !Address) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); unsigned NumDests = Record.size()-2; IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests); InstructionList.push_back(IBI); @@ -2743,7 +2845,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { IBI->addDestination(DestBB); } else { delete IBI; - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); } } I = IBI; @@ -2753,7 +2855,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_INVOKE: { // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...] if (Record.size() < 4) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); AttributeSet PAL = getAttributes(Record[0]); unsigned CCInfo = Record[1]; BasicBlock *NormalBB = getBasicBlock(Record[2]); @@ -2762,7 +2864,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 4; Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); PointerType *CalleeTy = dyn_cast(Callee->getType()); FunctionType *FTy = !CalleeTy ? nullptr : @@ -2771,25 +2873,25 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { // Check that the right number of fixed parameters are here. if (!FTy || !NormalBB || !UnwindBB || Record.size() < OpNum+FTy->getNumParams()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Ops; for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { Ops.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); if (!Ops.back()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); } if (!FTy->isVarArg()) { if (Record.size() != OpNum) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); } else { // Read type/value pairs for varargs params. while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Ops.push_back(Op); } } @@ -2805,7 +2907,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned Idx = 0; Value *Val = nullptr; if (getValueTypePair(Record, Idx, NextValueNo, Val)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = ResumeInst::Create(Val); InstructionList.push_back(I); break; @@ -2816,10 +2918,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { break; case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] if (Record.size() < 1 || ((Record.size()-1)&1)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *Ty = getTypeByID(Record[0]); if (!Ty) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); InstructionList.push_back(PN); @@ -2835,7 +2937,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { V = getValue(Record, 1+i, NextValueNo, Ty); BasicBlock *BB = getBasicBlock(Record[2+i]); if (!V || !BB) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); PN->addIncoming(V, BB); } I = PN; @@ -2846,13 +2948,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?] unsigned Idx = 0; if (Record.size() < 4) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *Ty = getTypeByID(Record[Idx++]); if (!Ty) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Value *PersFn = nullptr; if (getValueTypePair(Record, Idx, NextValueNo, PersFn)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); bool IsCleanup = !!Record[Idx++]; unsigned NumClauses = Record[Idx++]; @@ -2865,7 +2967,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, Idx, NextValueNo, Val)) { delete LP; - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); } assert((CT != LandingPadInst::Catch || @@ -2884,15 +2986,19 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] if (Record.size() != 4) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); PointerType *Ty = dyn_cast_or_null(getTypeByID(Record[0])); Type *OpTy = getTypeByID(Record[1]); Value *Size = getFnValueByID(Record[2], OpTy); - unsigned Align = Record[3]; + unsigned AlignRecord = Record[3]; + bool InAlloca = AlignRecord & (1 << 5); + unsigned Align = AlignRecord & ((1 << 5) - 1); if (!Ty || !Size) - return Error(InvalidRecord); - I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1); + return Error(BitcodeError::InvalidRecord); + AllocaInst *AI = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1); + AI->setUsedWithInAlloca(InAlloca); + I = AI; InstructionList.push_back(I); break; } @@ -2901,7 +3007,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+2 != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1); InstructionList.push_back(I); @@ -2913,15 +3019,14 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+4 != Record.size()) - return Error(InvalidRecord); - + return Error(BitcodeError::InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); if (Ordering == NotAtomic || Ordering == Release || Ordering == AcquireRelease) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); if (Ordering != NotAtomic && Record[OpNum] == 0) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1, @@ -2936,7 +3041,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+2 != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1); InstructionList.push_back(I); @@ -2950,15 +3055,15 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); if (Ordering == NotAtomic || Ordering == Acquire || Ordering == AcquireRelease) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); if (Ordering != NotAtomic && Record[OpNum] == 0) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1, Ordering, SynchScope); @@ -2976,10 +3081,10 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), New) || (Record.size() < OpNum + 3 || Record.size() > OpNum + 5)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); AtomicOrdering SuccessOrdering = GetDecodedOrdering(Record[OpNum+1]); if (SuccessOrdering == NotAtomic || SuccessOrdering == Unordered) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]); AtomicOrdering FailureOrdering; @@ -3014,14 +3119,14 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]); if (Operation < AtomicRMWInst::FIRST_BINOP || Operation > AtomicRMWInst::LAST_BINOP) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); if (Ordering == NotAtomic || Ordering == Unordered) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope); cast(I)->setVolatile(Record[OpNum+1]); @@ -3030,11 +3135,11 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope] if (2 != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[0]); if (Ordering == NotAtomic || Ordering == Unordered || Ordering == Monotonic) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]); I = new FenceInst(Context, Ordering, SynchScope); InstructionList.push_back(I); @@ -3043,7 +3148,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_CALL: { // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...] if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); AttributeSet PAL = getAttributes(Record[0]); unsigned CCInfo = Record[1]; @@ -3051,13 +3156,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 2; Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); PointerType *OpTy = dyn_cast(Callee->getType()); FunctionType *FTy = nullptr; if (OpTy) FTy = dyn_cast(OpTy->getElementType()); if (!FTy || Record.size() < FTy->getNumParams()+OpNum) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); SmallVector Args; // Read the fixed params. @@ -3068,18 +3173,18 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { Args.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); if (!Args.back()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); } // Read type/value pairs for varargs params. if (!FTy->isVarArg()) { if (OpNum != Record.size()) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); } else { while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Args.push_back(Op); } } @@ -3099,12 +3204,12 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty] if (Record.size() < 3) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); Type *OpTy = getTypeByID(Record[0]); Value *Op = getValue(Record, 1, NextValueNo, OpTy); Type *ResTy = getTypeByID(Record[2]); if (!OpTy || !Op || !ResTy) - return Error(InvalidRecord); + return Error(BitcodeError::InvalidRecord); I = new VAArgInst(Op, ResTy); InstructionList.push_back(I); break; @@ -3115,7 +3220,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) { // this file. if (!CurBB) { delete I; - return Error(InvalidInstructionWithNoBB); + return Error(BitcodeError::InvalidInstructionWithNoBB); } CurBB->getInstList().push_back(I); @@ -3142,32 +3247,13 @@ OutOfRecordLoop: delete A; } } - return Error(NeverResolvedValueFoundInFunction); + return Error(BitcodeError::NeverResolvedValueFoundInFunction); } } // FIXME: Check for unresolved forward-declared metadata references // and clean up leaks. - // See if anything took the address of blocks in this function. If so, - // resolve them now. - DenseMap >::iterator BAFRI = - BlockAddrFwdRefs.find(F); - if (BAFRI != BlockAddrFwdRefs.end()) { - std::vector &RefList = BAFRI->second; - for (unsigned i = 0, e = RefList.size(); i != e; ++i) { - unsigned BlockIdx = RefList[i].first; - if (BlockIdx >= FunctionBBs.size()) - return Error(InvalidID); - - GlobalVariable *FwdRef = RefList[i].second; - FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx])); - FwdRef->eraseFromParent(); - } - - BlockAddrFwdRefs.erase(BAFRI); - } - // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); MDValueList.shrinkTo(ModuleMDValueListSize); @@ -3181,7 +3267,7 @@ std::error_code BitcodeReader::FindFunctionInStream( DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { if (Stream.AtEndOfStream()) - return Error(CouldNotFindFunctionInStream); + return Error(BitcodeError::CouldNotFindFunctionInStream); // ParseModule will parse the next body in the stream and set its // position in the DeferredFunctionInfo map. if (std::error_code EC = ParseModule(true)) @@ -3196,15 +3282,7 @@ std::error_code BitcodeReader::FindFunctionInStream( void BitcodeReader::releaseBuffer() { Buffer.release(); } -bool BitcodeReader::isMaterializable(const GlobalValue *GV) const { - if (const Function *F = dyn_cast(GV)) { - return F->isDeclaration() && - DeferredFunctionInfo.count(const_cast(F)); - } - return false; -} - -std::error_code BitcodeReader::Materialize(GlobalValue *GV) { +std::error_code BitcodeReader::materialize(GlobalValue *GV) { Function *F = dyn_cast(GV); // If it's not a function or is already material, ignore the request. if (!F || !F->isMaterializable()) @@ -3223,6 +3301,7 @@ std::error_code BitcodeReader::Materialize(GlobalValue *GV) { if (std::error_code EC = ParseFunctionBody(F)) return EC; + F->setIsMaterializable(false); // Upgrade any old intrinsic calls in the function. for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(), @@ -3236,13 +3315,21 @@ std::error_code BitcodeReader::Materialize(GlobalValue *GV) { } } - return std::error_code(); + // Bring in any functions that this function forward-referenced via + // blockaddresses. + return materializeForwardReferencedFunctions(); } bool BitcodeReader::isDematerializable(const GlobalValue *GV) const { const Function *F = dyn_cast(GV); if (!F || F->isDeclaration()) return false; + + // Dematerializing F would leave dangling references that wouldn't be + // reconnected on re-materialization. + if (BlockAddressesTaken.count(F)) + return false; + return DeferredFunctionInfo.count(const_cast(F)); } @@ -3255,20 +3342,23 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) { assert(DeferredFunctionInfo.count(F) && "No info to read function later?"); // Just forget the function body, we can remat it later. - F->deleteBody(); + F->dropAllReferences(); + F->setIsMaterializable(true); } std::error_code BitcodeReader::MaterializeModule(Module *M) { assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); + + // Promise to materialize all forward references. + WillMaterializeAllForwardRefs = true; + // Iterate over the module, deserializing any functions that are still on // disk. for (Module::iterator F = TheModule->begin(), E = TheModule->end(); F != E; ++F) { - if (F->isMaterializable()) { - if (std::error_code EC = Materialize(F)) - return EC; - } + if (std::error_code EC = materialize(F)) + return EC; } // At this point, if there are any function bodies, the current bit is // pointing to the END_BLOCK record after them. Now make sure the rest @@ -3276,6 +3366,11 @@ std::error_code BitcodeReader::MaterializeModule(Module *M) { if (NextUnreadBit) ParseModule(true); + // Check that all block address forward references got resolved (as we + // promised above). + if (!BasicBlockFwdRefs.empty()) + return Error(BitcodeError::NeverResolvedFunctionFromBlockAddress); + // Upgrade any intrinsic calls that slipped through (should not happen!) and // delete the old functions to clean up. We can't do this unless the entire // module is materialized because there could always be another function body @@ -3312,21 +3407,17 @@ std::error_code BitcodeReader::InitStreamFromBuffer() { const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart(); const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); - if (Buffer->getBufferSize() & 3) { - if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd)) - return Error(InvalidBitcodeSignature); - else - return Error(BitcodeStreamInvalidSize); - } + if (Buffer->getBufferSize() & 3) + return Error(BitcodeError::InvalidBitcodeSignature); // If we have a wrapper header, parse it and ignore the non-bc file contents. // The magic number is 0x0B17C0DE stored in little endian. if (isBitcodeWrapper(BufPtr, BufEnd)) if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true)) - return Error(InvalidBitcodeWrapperHeader); + return Error(BitcodeError::InvalidBitcodeWrapperHeader); StreamFile.reset(new BitstreamReader(BufPtr, BufEnd)); - Stream.init(*StreamFile); + Stream.init(&*StreamFile); return std::error_code(); } @@ -3336,14 +3427,14 @@ std::error_code BitcodeReader::InitLazyStream() { // see it. StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer); StreamFile.reset(new BitstreamReader(Bytes)); - Stream.init(*StreamFile); + Stream.init(&*StreamFile); unsigned char buf[16]; - if (Bytes->readBytes(0, 16, buf) == -1) - return Error(BitcodeStreamInvalidSize); + if (Bytes->readBytes(buf, 16, 0) != 16) + return Error(BitcodeError::InvalidBitcodeSignature); if (!isBitcode(buf, buf + 16)) - return Error(InvalidBitcodeSignature); + return Error(BitcodeError::InvalidBitcodeSignature); if (isBitcodeWrapper(buf, buf + 4)) { const unsigned char *bitcodeStart = buf; @@ -3361,45 +3452,45 @@ class BitcodeErrorCategoryType : public std::error_category { return "llvm.bitcode"; } std::string message(int IE) const override { - BitcodeReader::ErrorType E = static_cast(IE); + BitcodeError E = static_cast(IE); switch (E) { - case BitcodeReader::BitcodeStreamInvalidSize: - return "Bitcode stream length should be >= 16 bytes and a multiple of 4"; - case BitcodeReader::ConflictingMETADATA_KINDRecords: + case BitcodeError::ConflictingMETADATA_KINDRecords: return "Conflicting METADATA_KIND records"; - case BitcodeReader::CouldNotFindFunctionInStream: + case BitcodeError::CouldNotFindFunctionInStream: return "Could not find function in stream"; - case BitcodeReader::ExpectedConstant: + case BitcodeError::ExpectedConstant: return "Expected a constant"; - case BitcodeReader::InsufficientFunctionProtos: + case BitcodeError::InsufficientFunctionProtos: return "Insufficient function protos"; - case BitcodeReader::InvalidBitcodeSignature: + case BitcodeError::InvalidBitcodeSignature: return "Invalid bitcode signature"; - case BitcodeReader::InvalidBitcodeWrapperHeader: + case BitcodeError::InvalidBitcodeWrapperHeader: return "Invalid bitcode wrapper header"; - case BitcodeReader::InvalidConstantReference: + case BitcodeError::InvalidConstantReference: return "Invalid ronstant reference"; - case BitcodeReader::InvalidID: + case BitcodeError::InvalidID: return "Invalid ID"; - case BitcodeReader::InvalidInstructionWithNoBB: + case BitcodeError::InvalidInstructionWithNoBB: return "Invalid instruction with no BB"; - case BitcodeReader::InvalidRecord: + case BitcodeError::InvalidRecord: return "Invalid record"; - case BitcodeReader::InvalidTypeForValue: + case BitcodeError::InvalidTypeForValue: return "Invalid type for value"; - case BitcodeReader::InvalidTYPETable: + case BitcodeError::InvalidTYPETable: return "Invalid TYPE table"; - case BitcodeReader::InvalidType: + case BitcodeError::InvalidType: return "Invalid type"; - case BitcodeReader::MalformedBlock: + case BitcodeError::MalformedBlock: return "Malformed block"; - case BitcodeReader::MalformedGlobalInitializerSet: + case BitcodeError::MalformedGlobalInitializerSet: return "Malformed global initializer set"; - case BitcodeReader::InvalidMultipleBlocks: + case BitcodeError::InvalidMultipleBlocks: return "Invalid multiple blocks"; - case BitcodeReader::NeverResolvedValueFoundInFunction: + case BitcodeError::NeverResolvedValueFoundInFunction: return "Never resolved value found in function"; - case BitcodeReader::InvalidValue: + case BitcodeError::NeverResolvedFunctionFromBlockAddress: + return "Never resolved function from blockaddress"; + case BitcodeError::InvalidValue: return "Invalid value"; } llvm_unreachable("Unknown error type!"); @@ -3407,33 +3498,54 @@ class BitcodeErrorCategoryType : public std::error_category { }; } -const std::error_category &BitcodeReader::BitcodeErrorCategory() { - static BitcodeErrorCategoryType O; - return O; +static ManagedStatic ErrorCategory; + +const std::error_category &llvm::BitcodeErrorCategory() { + return *ErrorCategory; } //===----------------------------------------------------------------------===// // External interface //===----------------------------------------------------------------------===// -/// getLazyBitcodeModule - lazy function-at-a-time loading from a file. +/// \brief Get a lazy one-at-time loading module from bitcode. /// -ErrorOr llvm::getLazyBitcodeModule(MemoryBuffer *Buffer, - LLVMContext &Context) { +/// This isn't always used in a lazy context. In particular, it's also used by +/// \a parseBitcodeFile(). If this is truly lazy, then we need to eagerly pull +/// in forward-referenced functions from block address references. +/// +/// \param[in] WillMaterializeAll Set to \c true if the caller promises to +/// materialize everything -- in particular, if this isn't truly lazy. +static ErrorOr +getLazyBitcodeModuleImpl(std::unique_ptr &&Buffer, + LLVMContext &Context, bool WillMaterializeAll) { Module *M = new Module(Buffer->getBufferIdentifier(), Context); - BitcodeReader *R = new BitcodeReader(Buffer, Context); + BitcodeReader *R = new BitcodeReader(Buffer.get(), Context); M->setMaterializer(R); - if (std::error_code EC = R->ParseBitcodeInto(M)) { + + auto cleanupOnError = [&](std::error_code EC) { R->releaseBuffer(); // Never take ownership on error. delete M; // Also deletes R. return EC; - } + }; + + if (std::error_code EC = R->ParseBitcodeInto(M)) + return cleanupOnError(EC); - R->materializeForwardReferencedFunctions(); + if (!WillMaterializeAll) + // Resolve forward references from blockaddresses. + if (std::error_code EC = R->materializeForwardReferencedFunctions()) + return cleanupOnError(EC); + Buffer.release(); // The BitcodeReader owns it now. return M; } +ErrorOr +llvm::getLazyBitcodeModule(std::unique_ptr &&Buffer, + LLVMContext &Context) { + return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false); +} Module *llvm::getStreamedBitcodeModule(const std::string &name, DataStreamer *streamer, @@ -3451,14 +3563,16 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name, return M; } -ErrorOr llvm::parseBitcodeFile(MemoryBuffer *Buffer, +ErrorOr llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context) { - ErrorOr ModuleOrErr = getLazyBitcodeModule(Buffer, Context); + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + ErrorOr ModuleOrErr = + getLazyBitcodeModuleImpl(std::move(Buf), Context, true); if (!ModuleOrErr) return ModuleOrErr; Module *M = ModuleOrErr.get(); // Read in the entire module, and destroy the BitcodeReader. - if (std::error_code EC = M->materializeAllPermanently(true)) { + if (std::error_code EC = M->materializeAllPermanently()) { delete M; return EC; } @@ -3469,12 +3583,11 @@ ErrorOr llvm::parseBitcodeFile(MemoryBuffer *Buffer, return M; } -std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer, +std::string llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context) { - BitcodeReader *R = new BitcodeReader(Buffer, Context); + std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); + auto R = llvm::make_unique(Buf.release(), Context); ErrorOr Triple = R->parseTriple(); - R->releaseBuffer(); - delete R; if (Triple.getError()) return ""; return Triple.get(); diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 1d4869a..047fef8 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef BITCODE_READER_H -#define BITCODE_READER_H +#ifndef LLVM_LIB_BITCODE_READER_BITCODEREADER_H +#define LLVM_LIB_BITCODE_READER_BITCODEREADER_H #include "llvm/ADT/DenseMap.h" #include "llvm/Bitcode/BitstreamReader.h" @@ -22,6 +22,7 @@ #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/ValueHandle.h" +#include #include #include @@ -138,7 +139,6 @@ class BitcodeReader : public GVMaterializer { BitcodeReaderMDValueList MDValueList; std::vector ComdatList; SmallVector InstructionList; - SmallVector, 64> UseListRecords; std::vector > GlobalInits; std::vector > AliasInits; @@ -180,10 +180,11 @@ class BitcodeReader : public GVMaterializer { /// stream. DenseMap DeferredFunctionInfo; - /// BlockAddrFwdRefs - These are blockaddr references to basic blocks. These - /// are resolved lazily when functions are loaded. - typedef std::pair BlockAddrRefTy; - DenseMap > BlockAddrFwdRefs; + /// These are basic blocks forward-referenced by block addresses. They are + /// inserted lazily into functions when they're loaded. The basic block ID is + /// its index into the vector. + DenseMap> BasicBlockFwdRefs; + std::deque BasicBlockFwdRefQueue; /// UseRelativeIDs - Indicates that we are using a new encoding for /// instruction operands where most operands in the current @@ -194,55 +195,36 @@ class BitcodeReader : public GVMaterializer { /// not need this flag. bool UseRelativeIDs; - static const std::error_category &BitcodeErrorCategory(); + /// True if all functions will be materialized, negating the need to process + /// (e.g.) blockaddress forward references. + bool WillMaterializeAllForwardRefs; + + /// Functions that have block addresses taken. This is usually empty. + SmallPtrSet BlockAddressesTaken; public: - enum ErrorType { - BitcodeStreamInvalidSize, - ConflictingMETADATA_KINDRecords, - CouldNotFindFunctionInStream, - ExpectedConstant, - InsufficientFunctionProtos, - InvalidBitcodeSignature, - InvalidBitcodeWrapperHeader, - InvalidConstantReference, - InvalidID, // A read identifier is not found in the table it should be in. - InvalidInstructionWithNoBB, - InvalidRecord, // A read record doesn't have the expected size or structure - InvalidTypeForValue, // Type read OK, but is invalid for its use - InvalidTYPETable, - InvalidType, // We were unable to read a type - MalformedBlock, // We are unable to advance in the stream. - MalformedGlobalInitializerSet, - InvalidMultipleBlocks, // We found multiple blocks of a kind that should - // have only one - NeverResolvedValueFoundInFunction, - InvalidValue // Invalid version, inst number, attr number, etc - }; - - std::error_code Error(ErrorType E) { - return std::error_code(E, BitcodeErrorCategory()); - } + std::error_code Error(BitcodeError E) { return make_error_code(E); } explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) : Context(C), TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), + WillMaterializeAllForwardRefs(false) {} explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C) : Context(C), TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), + WillMaterializeAllForwardRefs(false) {} ~BitcodeReader() { FreeState(); } - void materializeForwardReferencedFunctions(); + std::error_code materializeForwardReferencedFunctions(); void FreeState(); - void releaseBuffer() override; + void releaseBuffer(); - bool isMaterializable(const GlobalValue *GV) const override; bool isDematerializable(const GlobalValue *GV) const override; - std::error_code Materialize(GlobalValue *GV) override; + std::error_code materialize(GlobalValue *GV) override; std::error_code MaterializeModule(Module *M) override; void Dematerialize(GlobalValue *GV) override; diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index 72451ec..5e3232e 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -15,41 +15,11 @@ using namespace llvm; // BitstreamCursor implementation //===----------------------------------------------------------------------===// -void BitstreamCursor::operator=(const BitstreamCursor &RHS) { - freeState(); - - BitStream = RHS.BitStream; - NextChar = RHS.NextChar; - CurWord = RHS.CurWord; - BitsInCurWord = RHS.BitsInCurWord; - CurCodeSize = RHS.CurCodeSize; - - // Copy abbreviations, and bump ref counts. - CurAbbrevs = RHS.CurAbbrevs; - for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i) - CurAbbrevs[i]->addRef(); - - // Copy block scope and bump ref counts. - BlockScope = RHS.BlockScope; - for (size_t S = 0, e = BlockScope.size(); S != e; ++S) { - std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; - for (size_t i = 0, e = Abbrevs.size(); i != e; ++i) - Abbrevs[i]->addRef(); - } -} - void BitstreamCursor::freeState() { // Free all the Abbrevs. - for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i) - CurAbbrevs[i]->dropRef(); CurAbbrevs.clear(); // Free all the Abbrevs in the block scope. - for (size_t S = 0, e = BlockScope.size(); S != e; ++S) { - std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; - for (size_t i = 0, e = Abbrevs.size(); i != e; ++i) - Abbrevs[i]->dropRef(); - } BlockScope.clear(); } @@ -63,10 +33,8 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { // Add the abbrevs specific to this block to the CurAbbrevs list. if (const BitstreamReader::BlockInfo *Info = BitStream->getBlockInfo(BlockID)) { - for (size_t i = 0, e = Info->Abbrevs.size(); i != e; ++i) { - CurAbbrevs.push_back(Info->Abbrevs[i]); - CurAbbrevs.back()->addRef(); - } + CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(), + Info->Abbrevs.end()); } // Get the codesize of this block. @@ -82,16 +50,9 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { return false; } -void BitstreamCursor::readAbbreviatedLiteral(const BitCodeAbbrevOp &Op, - SmallVectorImpl &Vals) { - assert(Op.isLiteral() && "Not a literal"); - // If the abbrev specifies the literal value to use, use it. - Vals.push_back(Op.getLiteralValue()); -} - -void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op, - SmallVectorImpl &Vals) { - assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); +static uint64_t readAbbreviatedField(BitstreamCursor &Cursor, + const BitCodeAbbrevOp &Op) { + assert(!Op.isLiteral() && "Not to be used with literals!"); // Decode the value as we are commanded. switch (Op.getEncoding()) { @@ -99,19 +60,18 @@ void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op, case BitCodeAbbrevOp::Blob: llvm_unreachable("Should not reach here"); case BitCodeAbbrevOp::Fixed: - Vals.push_back(Read((unsigned)Op.getEncodingData())); - break; + return Cursor.Read((unsigned)Op.getEncodingData()); case BitCodeAbbrevOp::VBR: - Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData())); - break; + return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); case BitCodeAbbrevOp::Char6: - Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6))); - break; + return BitCodeAbbrevOp::DecodeChar6(Cursor.Read(6)); } + llvm_unreachable("invalid abbreviation encoding"); } -void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) { - assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); +static void skipAbbreviatedField(BitstreamCursor &Cursor, + const BitCodeAbbrevOp &Op) { + assert(!Op.isLiteral() && "Not to be used with literals!"); // Decode the value as we are commanded. switch (Op.getEncoding()) { @@ -119,13 +79,13 @@ void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) { case BitCodeAbbrevOp::Blob: llvm_unreachable("Should not reach here"); case BitCodeAbbrevOp::Fixed: - (void)Read((unsigned)Op.getEncodingData()); + Cursor.Read((unsigned)Op.getEncodingData()); break; case BitCodeAbbrevOp::VBR: - (void)ReadVBR64((unsigned)Op.getEncodingData()); + Cursor.ReadVBR64((unsigned)Op.getEncodingData()); break; case BitCodeAbbrevOp::Char6: - (void)Read(6); + Cursor.Read(6); break; } } @@ -153,7 +113,7 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) { if (Op.getEncoding() != BitCodeAbbrevOp::Array && Op.getEncoding() != BitCodeAbbrevOp::Blob) { - skipAbbreviatedField(Op); + skipAbbreviatedField(*this, Op); continue; } @@ -167,7 +127,7 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) { // Read all the elements. for (; NumElts; --NumElts) - skipAbbreviatedField(EltEnc); + skipAbbreviatedField(*this, EltEnc); continue; } @@ -207,22 +167,22 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, // Read the record code first. assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); + unsigned Code; if (CodeOp.isLiteral()) - readAbbreviatedLiteral(CodeOp, Vals); + Code = CodeOp.getLiteralValue(); else - readAbbreviatedField(CodeOp, Vals); - unsigned Code = (unsigned)Vals.pop_back_val(); + Code = readAbbreviatedField(*this, CodeOp); for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) { - readAbbreviatedLiteral(Op, Vals); + Vals.push_back(Op.getLiteralValue()); continue; } if (Op.getEncoding() != BitCodeAbbrevOp::Array && Op.getEncoding() != BitCodeAbbrevOp::Blob) { - readAbbreviatedField(Op, Vals); + Vals.push_back(readAbbreviatedField(*this, Op)); continue; } @@ -236,7 +196,7 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, // Read all the elements. for (; NumElts; --NumElts) - readAbbreviatedField(EltEnc, Vals); + Vals.push_back(readAbbreviatedField(*this, EltEnc)); continue; } @@ -339,9 +299,8 @@ bool BitstreamCursor::ReadBlockInfoBlock() { // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the // appropriate BlockInfo. - BitCodeAbbrev *Abbv = CurAbbrevs.back(); + CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); CurAbbrevs.pop_back(); - CurBlockInfo->Abbrevs.push_back(Abbv); continue; } diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp index 3747122..7218ea0 100644 --- a/lib/Bitcode/Writer/BitWriter.cpp +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -18,10 +18,10 @@ using namespace llvm; /*===-- Operations on modules ---------------------------------------------===*/ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { - std::string ErrorInfo; - raw_fd_ostream OS(Path, ErrorInfo, sys::fs::F_None); + std::error_code EC; + raw_fd_ostream OS(Path, EC, sys::fs::F_None); - if (!ErrorInfo.empty()) + if (EC) return -1; WriteBitcodeToFile(unwrap(M), OS); @@ -39,3 +39,11 @@ int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose, int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) { return LLVMWriteBitcodeToFD(M, FileHandle, true, false); } + +LLVMMemoryBufferRef LLVMWriteBitcodeToMemoryBuffer(LLVMModuleRef M) { + std::string Data; + raw_string_ostream OS(Data); + + WriteBitcodeToFile(unwrap(M), OS); + return wrap(MemoryBuffer::getMemBufferCopy(OS.str()).release()); +} diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index dd9282a..6cfc357 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -32,12 +33,6 @@ #include using namespace llvm; -static cl::opt -EnablePreserveUseListOrdering("enable-bc-uselist-preserve", - cl::desc("Turn on experimental support for " - "use-list order preservation."), - cl::init(false), cl::Hidden); - /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { @@ -201,6 +196,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_NON_LAZY_BIND; case Attribute::NonNull: return bitc::ATTR_KIND_NON_NULL; + case Attribute::Dereferenceable: + return bitc::ATTR_KIND_DEREFERENCEABLE; case Attribute::NoRedZone: return bitc::ATTR_KIND_NO_RED_ZONE; case Attribute::NoReturn: @@ -272,7 +269,7 @@ static void WriteAttributeGroupTable(const ValueEnumerator &VE, if (Attr.isEnumAttribute()) { Record.push_back(0); Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); - } else if (Attr.isAlignAttribute()) { + } else if (Attr.isIntAttribute()) { Record.push_back(1); Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); Record.push_back(Attr.getValueAsInt()); @@ -713,18 +710,15 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, static uint64_t GetOptimizationFlags(const Value *V) { uint64_t Flags = 0; - if (const OverflowingBinaryOperator *OBO = - dyn_cast(V)) { + if (const auto *OBO = dyn_cast(V)) { if (OBO->hasNoSignedWrap()) Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP; if (OBO->hasNoUnsignedWrap()) Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP; - } else if (const PossiblyExactOperator *PEO = - dyn_cast(V)) { + } else if (const auto *PEO = dyn_cast(V)) { if (PEO->isExact()) Flags |= 1 << bitc::PEO_EXACT; - } else if (const FPMathOperator *FPMO = - dyn_cast(V)) { + } else if (const auto *FPMO = dyn_cast(V)) { if (FPMO->hasUnsafeAlgebra()) Flags |= FastMathFlags::UnsafeAlgebra; if (FPMO->hasNoNaNs()) @@ -762,13 +756,13 @@ static void WriteMDNode(const MDNode *N, static void WriteModuleMetadata(const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) { - const ValueEnumerator::ValueList &Vals = VE.getMDValues(); + const auto &Vals = VE.getMDValues(); bool StartedMetadataBlock = false; unsigned MDSAbbrev = 0; SmallVector Record; for (unsigned i = 0, e = Vals.size(); i != e; ++i) { - if (const MDNode *N = dyn_cast(Vals[i].first)) { + if (const MDNode *N = dyn_cast(Vals[i])) { if (!N->isFunctionLocal() || !N->getFunction()) { if (!StartedMetadataBlock) { Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); @@ -776,7 +770,7 @@ static void WriteModuleMetadata(const Module *M, } WriteMDNode(N, VE, Stream, Record); } - } else if (const MDString *MDS = dyn_cast(Vals[i].first)) { + } else if (const MDString *MDS = dyn_cast(Vals[i])) { if (!StartedMetadataBlock) { Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); @@ -854,7 +848,7 @@ static void WriteMetadataAttachment(const Function &F, // Write metadata attachments // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] - SmallVector, 4> MDs; + SmallVector, 4> MDs; for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); @@ -1431,13 +1425,20 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, break; } - case Instruction::Alloca: + case Instruction::Alloca: { Code = bitc::FUNC_CODE_INST_ALLOCA; Vals.push_back(VE.getTypeID(I.getType())); Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); Vals.push_back(VE.getValueID(I.getOperand(0))); // size. - Vals.push_back(Log2_32(cast(I).getAlignment())+1); + const AllocaInst &AI = cast(I); + unsigned AlignRecord = Log2_32(AI.getAlignment()) + 1; + assert(Log2_32(Value::MaximumAlignment) + 1 < 1 << 5 && + "not enough bits for maximum alignment"); + assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64"); + AlignRecord |= AI.isUsedWithInAlloca() << 5; + Vals.push_back(AlignRecord); break; + } case Instruction::Load: if (cast(I).isAtomic()) { @@ -1598,6 +1599,39 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST, Stream.ExitBlock(); } +static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order, + BitstreamWriter &Stream) { + assert(Order.Shuffle.size() >= 2 && "Shuffle too small"); + unsigned Code; + if (isa(Order.V)) + Code = bitc::USELIST_CODE_BB; + else + Code = bitc::USELIST_CODE_DEFAULT; + + SmallVector Record; + for (unsigned I : Order.Shuffle) + Record.push_back(I); + Record.push_back(VE.getValueID(Order.V)); + Stream.EmitRecord(Code, Record); +} + +static void WriteUseListBlock(const Function *F, ValueEnumerator &VE, + BitstreamWriter &Stream) { + auto hasMore = [&]() { + return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F; + }; + if (!hasMore()) + // Nothing to do. + return; + + Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); + while (hasMore()) { + WriteUseList(VE, std::move(VE.UseListOrders.back()), Stream); + VE.UseListOrders.pop_back(); + } + Stream.ExitBlock(); +} + /// WriteFunction - Emit a function body to the module stream. static void WriteFunction(const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream) { @@ -1666,6 +1700,8 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, if (NeedsMetadataAttachment) WriteMetadataAttachment(F, VE, Stream); + if (shouldPreserveBitcodeUseListOrder()) + WriteUseListBlock(&F, VE, Stream); VE.purgeFunction(); Stream.ExitBlock(); } @@ -1831,98 +1867,6 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } -// Sort the Users based on the order in which the reader parses the bitcode -// file. -static bool bitcodereader_order(const User *lhs, const User *rhs) { - // TODO: Implement. - return true; -} - -static void WriteUseList(const Value *V, const ValueEnumerator &VE, - BitstreamWriter &Stream) { - - // One or zero uses can't get out of order. - if (V->use_empty() || V->hasNUses(1)) - return; - - // Make a copy of the in-memory use-list for sorting. - SmallVector UserList(V->user_begin(), V->user_end()); - - // Sort the copy based on the order read by the BitcodeReader. - std::sort(UserList.begin(), UserList.end(), bitcodereader_order); - - // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the - // sorted list (i.e., the expected BitcodeReader in-memory use-list). - - // TODO: Emit the USELIST_CODE_ENTRYs. -} - -static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE, - BitstreamWriter &Stream) { - VE.incorporateFunction(*F); - - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) - WriteUseList(AI, VE, Stream); - for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE; - ++BB) { - WriteUseList(BB, VE, Stream); - for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; - ++II) { - WriteUseList(II, VE, Stream); - for (User::const_op_iterator OI = II->op_begin(), E = II->op_end(); - OI != E; ++OI) { - if ((isa(*OI) && !isa(*OI)) || - isa(*OI)) - WriteUseList(*OI, VE, Stream); - } - } - } - VE.purgeFunction(); -} - -// Emit use-lists. -static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE, - BitstreamWriter &Stream) { - Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); - - // XXX: this modifies the module, but in a way that should never change the - // behavior of any pass or codegen in LLVM. The problem is that GVs may - // contain entries in the use_list that do not exist in the Module and are - // not stored in the .bc file. - for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); - I != E; ++I) - I->removeDeadConstantUsers(); - - // Write the global variables. - for (Module::const_global_iterator GI = M->global_begin(), - GE = M->global_end(); GI != GE; ++GI) { - WriteUseList(GI, VE, Stream); - - // Write the global variable initializers. - if (GI->hasInitializer()) - WriteUseList(GI->getInitializer(), VE, Stream); - } - - // Write the functions. - for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) { - WriteUseList(FI, VE, Stream); - if (!FI->isDeclaration()) - WriteFunctionUseList(FI, VE, Stream); - if (FI->hasPrefixData()) - WriteUseList(FI->getPrefixData(), VE, Stream); - } - - // Write the aliases. - for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end(); - AI != AE; ++AI) { - WriteUseList(AI, VE, Stream); - WriteUseList(AI->getAliasee(), VE, Stream); - } - - Stream.ExitBlock(); -} - /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); @@ -1933,7 +1877,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); // Analyze the module, enumerating globals, functions, etc. - ValueEnumerator VE(M); + ValueEnumerator VE(*M); // Emit blockinfo, which defines the standard abbreviations etc. WriteBlockInfo(VE, Stream); @@ -1965,9 +1909,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit names for globals/functions etc. WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); - // Emit use-lists. - if (EnablePreserveUseListOrdering) - WriteModuleUseLists(M, VE, Stream); + // Emit module-level use-lists. + if (shouldPreserveBitcodeUseListOrder()) + WriteUseListBlock(nullptr, VE, Stream); // Emit function bodies. for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 15f8034..f065c83 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -18,31 +18,280 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; +namespace { +struct OrderMap { + DenseMap> IDs; + unsigned LastGlobalConstantID; + unsigned LastGlobalValueID; + + OrderMap() : LastGlobalConstantID(0), LastGlobalValueID(0) {} + + bool isGlobalConstant(unsigned ID) const { + return ID <= LastGlobalConstantID; + } + bool isGlobalValue(unsigned ID) const { + return ID <= LastGlobalValueID && !isGlobalConstant(ID); + } + + unsigned size() const { return IDs.size(); } + std::pair &operator[](const Value *V) { return IDs[V]; } + std::pair lookup(const Value *V) const { + return IDs.lookup(V); + } + void index(const Value *V) { + // Explicitly sequence get-size and insert-value operations to avoid UB. + unsigned ID = IDs.size() + 1; + IDs[V].first = ID; + } +}; +} + +static void orderValue(const Value *V, OrderMap &OM) { + if (OM.lookup(V).first) + return; + + if (const Constant *C = dyn_cast(V)) + if (C->getNumOperands() && !isa(C)) + for (const Value *Op : C->operands()) + if (!isa(Op) && !isa(Op)) + orderValue(Op, OM); + + // Note: we cannot cache this lookup above, since inserting into the map + // changes the map's size, and thus affects the other IDs. + OM.index(V); +} + +static OrderMap orderModule(const Module &M) { + // This needs to match the order used by ValueEnumerator::ValueEnumerator() + // and ValueEnumerator::incorporateFunction(). + OrderMap OM; + + // In the reader, initializers of GlobalValues are set *after* all the + // globals have been read. Rather than awkwardly modeling this behaviour + // directly in predictValueUseListOrderImpl(), just assign IDs to + // initializers of GlobalValues before GlobalValues themselves to model this + // implicitly. + for (const GlobalVariable &G : M.globals()) + if (G.hasInitializer()) + if (!isa(G.getInitializer())) + orderValue(G.getInitializer(), OM); + for (const GlobalAlias &A : M.aliases()) + if (!isa(A.getAliasee())) + orderValue(A.getAliasee(), OM); + for (const Function &F : M) + if (F.hasPrefixData()) + if (!isa(F.getPrefixData())) + orderValue(F.getPrefixData(), OM); + OM.LastGlobalConstantID = OM.size(); + + // Initializers of GlobalValues are processed in + // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather + // than ValueEnumerator, and match the code in predictValueUseListOrderImpl() + // by giving IDs in reverse order. + // + // Since GlobalValues never reference each other directly (just through + // initializers), their relative IDs only matter for determining order of + // uses in their initializers. + for (const Function &F : M) + orderValue(&F, OM); + for (const GlobalAlias &A : M.aliases()) + orderValue(&A, OM); + for (const GlobalVariable &G : M.globals()) + orderValue(&G, OM); + OM.LastGlobalValueID = OM.size(); + + for (const Function &F : M) { + if (F.isDeclaration()) + continue; + // Here we need to match the union of ValueEnumerator::incorporateFunction() + // and WriteFunction(). Basic blocks are implicitly declared before + // anything else (by declaring their size). + for (const BasicBlock &BB : F) + orderValue(&BB, OM); + for (const Argument &A : F.args()) + orderValue(&A, OM); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + for (const Value *Op : I.operands()) + if ((isa(*Op) && !isa(*Op)) || + isa(*Op)) + orderValue(Op, OM); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + orderValue(&I, OM); + } + return OM; +} + +static void predictValueUseListOrderImpl(const Value *V, const Function *F, + unsigned ID, const OrderMap &OM, + UseListOrderStack &Stack) { + // Predict use-list order for this one. + typedef std::pair Entry; + SmallVector List; + for (const Use &U : V->uses()) + // Check if this user will be serialized. + if (OM.lookup(U.getUser()).first) + List.push_back(std::make_pair(&U, List.size())); + + if (List.size() < 2) + // We may have lost some users. + return; + + bool IsGlobalValue = OM.isGlobalValue(ID); + std::sort(List.begin(), List.end(), [&](const Entry &L, const Entry &R) { + const Use *LU = L.first; + const Use *RU = R.first; + if (LU == RU) + return false; + + auto LID = OM.lookup(LU->getUser()).first; + auto RID = OM.lookup(RU->getUser()).first; + + // Global values are processed in reverse order. + // + // Moreover, initializers of GlobalValues are set *after* all the globals + // have been read (despite having earlier IDs). Rather than awkwardly + // modeling this behaviour here, orderModule() has assigned IDs to + // initializers of GlobalValues before GlobalValues themselves. + if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID)) + return LID < RID; + + // If ID is 4, then expect: 7 6 5 1 2 3. + if (LID < RID) { + if (RID <= ID) + if (!IsGlobalValue) // GlobalValue uses don't get reversed. + return true; + return false; + } + if (RID < LID) { + if (LID <= ID) + if (!IsGlobalValue) // GlobalValue uses don't get reversed. + return false; + return true; + } + + // LID and RID are equal, so we have different operands of the same user. + // Assume operands are added in order for all instructions. + if (LID <= ID) + if (!IsGlobalValue) // GlobalValue uses don't get reversed. + return LU->getOperandNo() < RU->getOperandNo(); + return LU->getOperandNo() > RU->getOperandNo(); + }); + + if (std::is_sorted( + List.begin(), List.end(), + [](const Entry &L, const Entry &R) { return L.second < R.second; })) + // Order is already correct. + return; + + // Store the shuffle. + Stack.emplace_back(V, F, List.size()); + assert(List.size() == Stack.back().Shuffle.size() && "Wrong size"); + for (size_t I = 0, E = List.size(); I != E; ++I) + Stack.back().Shuffle[I] = List[I].second; +} + +static void predictValueUseListOrder(const Value *V, const Function *F, + OrderMap &OM, UseListOrderStack &Stack) { + auto &IDPair = OM[V]; + assert(IDPair.first && "Unmapped value"); + if (IDPair.second) + // Already predicted. + return; + + // Do the actual prediction. + IDPair.second = true; + if (!V->use_empty() && std::next(V->use_begin()) != V->use_end()) + predictValueUseListOrderImpl(V, F, IDPair.first, OM, Stack); + + // Recursive descent into constants. + if (const Constant *C = dyn_cast(V)) + if (C->getNumOperands()) // Visit GlobalValues. + for (const Value *Op : C->operands()) + if (isa(Op)) // Visit GlobalValues. + predictValueUseListOrder(Op, F, OM, Stack); +} + +static UseListOrderStack predictUseListOrder(const Module &M) { + OrderMap OM = orderModule(M); + + // Use-list orders need to be serialized after all the users have been added + // to a value, or else the shuffles will be incomplete. Store them per + // function in a stack. + // + // Aside from function order, the order of values doesn't matter much here. + UseListOrderStack Stack; + + // We want to visit the functions backward now so we can list function-local + // constants in the last Function they're used in. Module-level constants + // have already been visited above. + for (auto I = M.rbegin(), E = M.rend(); I != E; ++I) { + const Function &F = *I; + if (F.isDeclaration()) + continue; + for (const BasicBlock &BB : F) + predictValueUseListOrder(&BB, &F, OM, Stack); + for (const Argument &A : F.args()) + predictValueUseListOrder(&A, &F, OM, Stack); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + for (const Value *Op : I.operands()) + if (isa(*Op) || isa(*Op)) // Visit GlobalValues. + predictValueUseListOrder(Op, &F, OM, Stack); + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + predictValueUseListOrder(&I, &F, OM, Stack); + } + + // Visit globals last, since the module-level use-list block will be seen + // before the function bodies are processed. + for (const GlobalVariable &G : M.globals()) + predictValueUseListOrder(&G, nullptr, OM, Stack); + for (const Function &F : M) + predictValueUseListOrder(&F, nullptr, OM, Stack); + for (const GlobalAlias &A : M.aliases()) + predictValueUseListOrder(&A, nullptr, OM, Stack); + for (const GlobalVariable &G : M.globals()) + if (G.hasInitializer()) + predictValueUseListOrder(G.getInitializer(), nullptr, OM, Stack); + for (const GlobalAlias &A : M.aliases()) + predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack); + for (const Function &F : M) + if (F.hasPrefixData()) + predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack); + + return Stack; +} + static bool isIntOrIntVectorValue(const std::pair &V) { return V.first->getType()->isIntOrIntVectorTy(); } -/// ValueEnumerator - Enumerate module-level information. -ValueEnumerator::ValueEnumerator(const Module *M) { +ValueEnumerator::ValueEnumerator(const Module &M) { + if (shouldPreserveBitcodeUseListOrder()) + UseListOrders = predictUseListOrder(M); + // Enumerate the global variables. - for (Module::const_global_iterator I = M->global_begin(), - E = M->global_end(); I != E; ++I) + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) EnumerateValue(I); // Enumerate the functions. - for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { EnumerateValue(I); EnumerateAttributes(cast(I)->getAttributes()); } // Enumerate the aliases. - for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) EnumerateValue(I); @@ -50,30 +299,30 @@ ValueEnumerator::ValueEnumerator(const Module *M) { unsigned FirstConstant = Values.size(); // Enumerate the global variable initializers. - for (Module::const_global_iterator I = M->global_begin(), - E = M->global_end(); I != E; ++I) + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) if (I->hasInitializer()) EnumerateValue(I->getInitializer()); // Enumerate the aliasees. - for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) EnumerateValue(I->getAliasee()); // Enumerate the prefix data constants. - for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasPrefixData()) EnumerateValue(I->getPrefixData()); // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... - EnumerateValueSymbolTable(M->getValueSymbolTable()); + EnumerateValueSymbolTable(M.getValueSymbolTable()); EnumerateNamedMetadata(M); - SmallVector, 8> MDs; + SmallVector, 8> MDs; // Enumerate types used by function bodies and argument lists. - for (const Function &F : *M) { + for (const Function &F : M) { for (const Argument &A : F.args()) EnumerateType(A.getType()); @@ -179,6 +428,11 @@ void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map, void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { if (CstStart == CstEnd || CstStart+1 == CstEnd) return; + if (shouldPreserveBitcodeUseListOrder()) + // Optimizing constants makes the use-list order difficult to predict. + // Disable it for now when trying to preserve the order. + return; + std::stable_sort(Values.begin() + CstStart, Values.begin() + CstEnd, [this](const std::pair &LHS, const std::pair &RHS) { @@ -209,11 +463,12 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { EnumerateValue(VI->getValue()); } -/// EnumerateNamedMetadata - Insert all of the values referenced by -/// named metadata in the specified module. -void ValueEnumerator::EnumerateNamedMetadata(const Module *M) { - for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), - E = M->named_metadata_end(); I != E; ++I) +/// Insert all of the values referenced by named metadata in the specified +/// module. +void ValueEnumerator::EnumerateNamedMetadata(const Module &M) { + for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); + I != E; ++I) EnumerateNamedMDNode(I); } @@ -239,31 +494,31 @@ void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) { void ValueEnumerator::EnumerateMetadata(const Value *MD) { assert((isa(MD) || isa(MD)) && "Invalid metadata kind"); - // Enumerate the type of this value. - EnumerateType(MD->getType()); - + // Skip function-local nodes themselves, but walk their operands. const MDNode *N = dyn_cast(MD); - - // In the module-level pass, skip function-local nodes themselves, but - // do walk their operands. if (N && N->isFunctionLocal() && N->getFunction()) { EnumerateMDNodeOperands(N); return; } - // Check to see if it's already in! - unsigned &MDValueID = MDValueMap[MD]; - if (MDValueID) { - // Increment use count. - MDValues[MDValueID-1].second++; + // Insert a dummy ID to block the co-recursive call to + // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph. + // + // Return early if there's already an ID. + if (!MDValueMap.insert(std::make_pair(MD, 0)).second) return; - } - MDValues.push_back(std::make_pair(MD, 1U)); - MDValueID = MDValues.size(); - // Enumerate all non-function-local operands. + // Enumerate the type of this value. + EnumerateType(MD->getType()); + + // Visit operands first to minimize RAUW. if (N) EnumerateMDNodeOperands(N); + + // Replace the dummy ID inserted above with the correct one. MDValueMap may + // have changed by inserting operands, so we need a fresh lookup here. + MDValues.push_back(MD); + MDValueMap[MD] = MDValues.size(); } /// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata @@ -277,12 +532,10 @@ void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) { // Check to see if it's already in! unsigned &MDValueID = MDValueMap[N]; - if (MDValueID) { - // Increment use count. - MDValues[MDValueID-1].second++; + if (MDValueID) return; - } - MDValues.push_back(std::make_pair(N, 1U)); + + MDValues.push_back(N); MDValueID = MDValues.size(); // To incoroporate function-local information visit all function-local @@ -487,7 +740,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) { FnLocalMDVector.push_back(MD); } - SmallVector, 8> MDs; + SmallVector, 8> MDs; I->getAllMetadataOtherThanDebugLoc(MDs); for (unsigned i = 0, e = MDs.size(); i != e; ++i) { MDNode *N = MDs[i].second; @@ -510,7 +763,7 @@ void ValueEnumerator::purgeFunction() { for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i) ValueMap.erase(Values[i].first); for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i) - MDValueMap.erase(MDValues[i].first); + MDValueMap.erase(MDValues[i]); for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) ValueMap.erase(BasicBlocks[i]); diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 1c9f38e..563c214 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#ifndef VALUE_ENUMERATOR_H -#define VALUE_ENUMERATOR_H +#ifndef LLVM_LIB_BITCODE_WRITER_VALUEENUMERATOR_H +#define LLVM_LIB_BITCODE_WRITER_VALUEENUMERATOR_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/UseListOrder.h" #include namespace llvm { @@ -42,6 +43,9 @@ public: // For each value, we remember its Value* and occurrence frequency. typedef std::vector > ValueList; + + UseListOrderStack UseListOrders; + private: typedef DenseMap TypeMapType; TypeMapType TypeMap; @@ -54,7 +58,7 @@ private: typedef UniqueVector ComdatSetType; ComdatSetType Comdats; - ValueList MDValues; + std::vector MDValues; SmallVector FunctionLocalMDs; ValueMapType MDValueMap; @@ -92,7 +96,7 @@ private: ValueEnumerator(const ValueEnumerator &) LLVM_DELETED_FUNCTION; void operator=(const ValueEnumerator &) LLVM_DELETED_FUNCTION; public: - ValueEnumerator(const Module *M); + ValueEnumerator(const Module &M); void dump() const; void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const; @@ -130,7 +134,7 @@ public: } const ValueList &getValues() const { return Values; } - const ValueList &getMDValues() const { return MDValues; } + const std::vector &getMDValues() const { return MDValues; } const SmallVectorImpl &getFunctionLocalMDValues() const { return FunctionLocalMDs; } @@ -172,7 +176,7 @@ private: void EnumerateAttributes(AttributeSet PAL); void EnumerateValueSymbolTable(const ValueSymbolTable &ST); - void EnumerateNamedMetadata(const Module *M); + void EnumerateNamedMetadata(const Module &M); }; } // End llvm namespace diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 0f38c64..91c1314 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -111,18 +110,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg) return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); } - - -AggressiveAntiDepBreaker:: -AggressiveAntiDepBreaker(MachineFunction& MFi, - const RegisterClassInfo &RCI, - TargetSubtargetInfo::RegClassVector& CriticalPathRCs) : - AntiDepBreaker(), MF(MFi), - MRI(MF.getRegInfo()), - TII(MF.getTarget().getInstrInfo()), - TRI(MF.getTarget().getRegisterInfo()), - RegClassInfo(RCI), - State(nullptr) { +AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( + MachineFunction &MFi, const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) + : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getSubtarget().getInstrInfo()), + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), + State(nullptr) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { @@ -262,11 +256,8 @@ static void AntiDepEdges(const SUnit *SU, std::vector& Edges) { for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { - unsigned Reg = P->getReg(); - if (RegSet.count(Reg) == 0) { + if (RegSet.insert(P->getReg()).second) Edges.push_back(&*P); - RegSet.insert(Reg); - } } } } @@ -527,7 +518,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { BV &= RCBV; } - DEBUG(dbgs() << " " << RC->getName()); + DEBUG(dbgs() << " " << TRI->getRegClassName(RC)); } return BV; @@ -582,7 +573,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( unsigned Reg = Regs[i]; if (Reg == SuperReg) continue; bool IsSub = TRI->isSubRegister(SuperReg, Reg); - assert(IsSub && "Expecting group subregister"); + // FIXME: remove this once PR18663 has been properly fixed. For now, + // return a conservative answer: + // assert(IsSub && "Expecting group subregister"); if (!IsSub) return false; } @@ -618,8 +611,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG(dbgs() << "\tFind Registers:"); - if (RenameOrder.count(SuperRC) == 0) - RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); unsigned OrigR = RenameOrder[SuperRC]; unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 2ab9d89..12cf95b 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H -#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H +#ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" @@ -32,88 +32,83 @@ namespace llvm { class RegisterClassInfo; - /// Class AggressiveAntiDepState /// Contains all the state necessary for anti-dep breaking. class AggressiveAntiDepState { public: - /// RegisterReference - Information about a register reference - /// within a liverange + /// Information about a register reference within a liverange typedef struct { - /// Operand - The registers operand + /// The registers operand MachineOperand *Operand; - /// RC - The register class + /// The register class const TargetRegisterClass *RC; } RegisterReference; private: - /// NumTargetRegs - Number of non-virtual target registers - /// (i.e. TRI->getNumRegs()). + /// Number of non-virtual target registers (i.e. TRI->getNumRegs()). const unsigned NumTargetRegs; - /// GroupNodes - Implements a disjoint-union data structure to + /// Implements a disjoint-union data structure to /// form register groups. A node is represented by an index into /// the vector. A node can "point to" itself to indicate that it /// is the parent of a group, or point to another node to indicate /// that it is a member of the same group as that node. std::vector GroupNodes; - /// GroupNodeIndices - For each register, the index of the GroupNode + /// For each register, the index of the GroupNode /// currently representing the group that the register belongs to. /// Register 0 is always represented by the 0 group, a group /// composed of registers that are not eligible for anti-aliasing. std::vector GroupNodeIndices; - /// RegRefs - Map registers to all their references within a live range. + /// Map registers to all their references within a live range. std::multimap RegRefs; - /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// The index of the most recent kill (proceding bottom-up), /// or ~0u if the register is not live. std::vector KillIndices; - /// DefIndices - The index of the most recent complete def (proceding bottom + /// The index of the most recent complete def (proceding bottom /// up), or ~0u if the register is live. std::vector DefIndices; public: AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); - /// GetKillIndices - Return the kill indices. + /// Return the kill indices. std::vector &GetKillIndices() { return KillIndices; } - /// GetDefIndices - Return the define indices. + /// Return the define indices. std::vector &GetDefIndices() { return DefIndices; } - /// GetRegRefs - Return the RegRefs map. + /// Return the RegRefs map. std::multimap& GetRegRefs() { return RegRefs; } - // GetGroup - Get the group for a register. The returned value is + // Get the group for a register. The returned value is // the index of the GroupNode representing the group. unsigned GetGroup(unsigned Reg); - // GetGroupRegs - Return a vector of the registers belonging to a - // group. If RegRefs is non-NULL then only included referenced registers. + // Return a vector of the registers belonging to a group. + // If RegRefs is non-NULL then only included referenced registers. void GetGroupRegs( unsigned Group, std::vector &Regs, std::multimap *RegRefs); - // UnionGroups - Union Reg1's and Reg2's groups to form a new - // group. Return the index of the GroupNode representing the - // group. + // Union Reg1's and Reg2's groups to form a new group. + // Return the index of the GroupNode representing the group. unsigned UnionGroups(unsigned Reg1, unsigned Reg2); - // LeaveGroup - Remove a register from its current group and place + // Remove a register from its current group and place // it alone in its own group. Return the index of the GroupNode // representing the registers new group. unsigned LeaveGroup(unsigned Reg); - /// IsLive - Return true if Reg is live + /// Return true if Reg is live. bool IsLive(unsigned Reg); }; - /// Class AggressiveAntiDepBreaker class AggressiveAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; @@ -121,12 +116,11 @@ class RegisterClassInfo; const TargetRegisterInfo *TRI; const RegisterClassInfo &RegClassInfo; - /// CriticalPathSet - The set of registers that should only be + /// The set of registers that should only be /// renamed if they are on the critical path. BitVector CriticalPathSet; - /// State - The state used to identify and rename anti-dependence - /// registers. + /// The state used to identify and rename anti-dependence registers. AggressiveAntiDepState *State; public: @@ -135,11 +129,10 @@ class RegisterClassInfo; TargetSubtargetInfo::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); - /// Start - Initialize anti-dep breaking for a new basic block. + /// Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB) override; - /// BreakAntiDependencies - Identifiy anti-dependencies along the critical - /// path + /// Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(const std::vector& SUnits, @@ -148,24 +141,24 @@ class RegisterClassInfo; unsigned InsertPosIndex, DbgValueVector &DbgValues) override; - /// Observe - Update liveness information to account for the current + /// Update liveness information to account for the current /// instruction, which will not be scheduled. /// void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) override; - /// Finish - Finish anti-dep breaking for a basic block. + /// Finish anti-dep breaking for a basic block. void FinishBlock() override; private: /// Keep track of a position in the allocation order for each regclass. typedef std::map RenameOrderType; - /// IsImplicitDefUse - Return true if MO represents a register + /// Return true if MO represents a register /// that is both implicitly used and defined in MI bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO); - /// GetPassthruRegs - If MI implicitly def/uses a register, then + /// If MI implicitly def/uses a register, then /// return that register and all subregisters. void GetPassthruRegs(MachineInstr *MI, std::set& PassthruRegs); diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 64ff2a7..1e4eaa7 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H -#define LLVM_CODEGEN_ALLOCATIONORDER_H +#ifndef LLVM_LIB_CODEGEN_ALLOCATIONORDER_H +#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 1bdf312..9a3b790 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -25,6 +25,9 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" + using namespace llvm; /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence @@ -106,15 +109,16 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty, } /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. -GlobalVariable *llvm::ExtractTypeInfo(Value *V) { +GlobalValue *llvm::ExtractTypeInfo(Value *V) { V = V->stripPointerCasts(); - GlobalVariable *GV = dyn_cast(V); + GlobalValue *GV = dyn_cast(V); + GlobalVariable *Var = dyn_cast(V); - if (GV && GV->getName() == "llvm.eh.catch.all.value") { - assert(GV->hasInitializer() && + if (Var && Var->getName() == "llvm.eh.catch.all.value") { + assert(Var->hasInitializer() && "The EH catch-all value must have an initializer"); - Value *Init = GV->getInitializer(); - GV = dyn_cast(Init); + Value *Init = Var->getInitializer(); + GV = dyn_cast(Init); if (!GV) V = cast(Init); } @@ -475,7 +479,7 @@ static bool nextRealType(SmallVectorImpl &SubTypes, /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) { +bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -490,8 +494,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) { // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!DAG.getTarget().Options.GuaranteedTailCallOpt || - !isa(Term))) + (!TM.Options.GuaranteedTailCallOpt || !isa(Term))) return false; // If I will have a chain, make sure no other instruction that will have a @@ -509,8 +512,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) { return false; } - return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, - *DAG.getTarget().getTargetLowering()); + return returnTypeIsEligibleForTailCall( + ExitBB->getParent(), I, Ret, *TM.getSubtargetImpl()->getTargetLowering()); } bool llvm::returnTypeIsEligibleForTailCall(const Function *F, @@ -607,3 +610,29 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F, return true; } + +bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) { + if (!GV->hasLinkOnceODRLinkage()) + return false; + + if (GV->hasUnnamedAddr()) + return true; + + // If it is a non constant variable, it needs to be uniqued across shared + // objects. + if (const GlobalVariable *Var = dyn_cast(GV)) { + if (!Var->isConstant()) + return false; + } + + // An alias can point to a variable. We could try to resolve the alias to + // decide, but for now just don't hide them. + if (isa(GV)) + return false; + + GlobalStatus GS; + if (GlobalStatus::analyzeGlobal(GV, GS)) + return false; + + return !GS.IsCompared; +} diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk index 05e5c45..5cb351d 100644 --- a/lib/CodeGen/Android.mk +++ b/lib/CodeGen/Android.mk @@ -4,7 +4,7 @@ codegen_SRC_FILES := \ AggressiveAntiDepBreaker.cpp \ AllocationOrder.cpp \ Analysis.cpp \ - AtomicExpandLoadLinkedPass.cpp \ + AtomicExpandPass.cpp \ BasicTargetTransformInfo.cpp \ BranchFolding.cpp \ CalcSpillWeights.cpp \ @@ -21,6 +21,7 @@ codegen_SRC_FILES := \ ExecutionDepsFix.cpp \ ExpandISelPseudos.cpp \ ExpandPostRAPseudos.cpp \ + ForwardControlFlowIntegrity.cpp \ GCMetadata.cpp \ GCMetadataPrinter.cpp \ GCStrategy.cpp \ @@ -29,7 +30,6 @@ codegen_SRC_FILES := \ InlineSpiller.cpp \ InterferenceCache.cpp \ IntrinsicLowering.cpp \ - JITCodeEmitter.cpp \ JumpInstrTables.cpp \ LatencyPriorityQueue.cpp \ LexicalScopes.cpp \ @@ -49,7 +49,7 @@ codegen_SRC_FILES := \ MachineBlockFrequencyInfo.cpp \ MachineBlockPlacement.cpp \ MachineBranchProbabilityInfo.cpp \ - MachineCodeEmitter.cpp \ + MachineCombiner.cpp \ MachineCopyPropagation.cpp \ MachineCSE.cpp \ MachineDominators.cpp \ @@ -97,7 +97,6 @@ codegen_SRC_FILES := \ ShadowStackGC.cpp \ SjLjEHPrepare.cpp \ SlotIndexes.cpp \ - Spiller.cpp \ SpillPlacement.cpp \ SplitKit.cpp \ StackColoring.cpp \ diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index df47f98..a61a8ef 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H -#define LLVM_CODEGEN_ANTIDEPBREAKER_H +#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -25,9 +25,8 @@ namespace llvm { -/// AntiDepBreaker - This class works into conjunction with the -/// post-RA scheduler to rename registers to break register -/// anti-dependencies. +/// This class works in conjunction with the post-RA scheduler to rename +/// registers to break register anti-dependencies (WAR hazards). class AntiDepBreaker { public: typedef std::vector > @@ -35,29 +34,26 @@ public: virtual ~AntiDepBreaker(); - /// Start - Initialize anti-dep breaking for a new basic block. + /// Initialize anti-dep breaking for a new basic block. virtual void StartBlock(MachineBasicBlock *BB) =0; - /// BreakAntiDependencies - Identifiy anti-dependencies within a - /// basic-block region and break them by renaming registers. Return - /// the number of anti-dependencies broken. - /// + /// Identifiy anti-dependencies within a basic-block region and break them by + /// renaming registers. Return the number of anti-dependencies broken. virtual unsigned BreakAntiDependencies(const std::vector& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, DbgValueVector &DbgValues) = 0; - /// Observe - Update liveness information to account for the current + /// Update liveness information to account for the current /// instruction, which will not be scheduled. - /// virtual void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) =0; - /// Finish - Finish anti-dep breaking for a basic block. + /// Finish anti-dep breaking for a basic block. virtual void FinishBlock() =0; - /// UpdateDbgValue - Update DBG_VALUE if dependency breaker is updating + /// Update DBG_VALUE if dependency breaker is updating /// other machine instruction to use NewReg. void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) { assert (MI->isDebugValue() && "MI is not DBG_VALUE!"); diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 251f5ef..66c6c63 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -108,7 +108,7 @@ void ARMException::endFunction(const MachineFunction *) { } void ARMException::emitTypeInfos(unsigned TTypeEncoding) { - const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &TypeInfos = MMI->getTypeInfos(); const std::vector &FilterIds = MMI->getFilterIds(); bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); @@ -121,9 +121,9 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) { Entry = TypeInfos.size(); } - for (std::vector::const_reverse_iterator + for (std::vector::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalVariable *GV = *I; + const GlobalValue *GV = *I; if (VerboseAsm) Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h index 42757d7..802e050 100644 --- a/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/lib/CodeGen/AsmPrinter/AddressPool.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_ADDRESSPOOL_H__ -#define CODEGEN_ASMPRINTER_ADDRESSPOOL_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H #include "llvm/ADT/DenseMap.h" diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk index 083cc0d..cb8e96a 100644 --- a/lib/CodeGen/AsmPrinter/Android.mk +++ b/lib/CodeGen/AsmPrinter/Android.mk @@ -11,6 +11,7 @@ codegen_asmprinter_SRC_FILES := \ DIEHash.cpp \ DwarfAccelTable.cpp \ DwarfCFIException.cpp \ + DwarfCompileUnit.cpp \ DwarfDebug.cpp \ DwarfFile.cpp \ DwarfStringPool.cpp \ diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index f80fdea..8a32713 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -14,11 +14,13 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" +#include "Win64Exception.h" #include "WinCodeViewLineTables.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -49,7 +51,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -98,11 +99,10 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, } AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) - : MachineFunctionPass(ID), - TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()), - OutContext(Streamer.getContext()), - OutStreamer(Streamer), - LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) { + : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), + MII(tm.getSubtargetImpl()->getInstrInfo()), + OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(nullptr), + LastFn(0), Counter(~0U), SetCounter(0) { DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr; CurrentFnSym = CurrentFnSymForSize = nullptr; GCMetadataPrinters = nullptr; @@ -129,12 +129,12 @@ unsigned AsmPrinter::getFunctionNumber() const { } const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { - return TM.getTargetLowering()->getObjFileLowering(); + return TM.getSubtargetImpl()->getTargetLowering()->getObjFileLowering(); } /// getDataLayout - Return information about data layout. const DataLayout &AsmPrinter::getDataLayout() const { - return *TM.getDataLayout(); + return *TM.getSubtargetImpl()->getDataLayout(); } const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { @@ -173,9 +173,9 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast(getObjFileLowering()) .Initialize(OutContext, TM); - OutStreamer.InitSections(); + OutStreamer.InitSections(false); - Mang = new Mangler(TM.getDataLayout()); + Mang = new Mangler(TM.getSubtargetImpl()->getDataLayout()); // Emit the version-min deplyment target directive if needed. // @@ -222,14 +222,12 @@ bool AsmPrinter::doInitialization(Module &M) { } if (MAI->doesSupportDebugInformation()) { - if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) { + if (Triple(TM.getTargetTriple()).isKnownWindowsMSVCEnvironment()) Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), DbgTimerName, CodeViewLineTablesGroupName)); - } else { - DD = new DwarfDebug(this, &M); - Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); - } + DD = new DwarfDebug(this, &M); + Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); } EHStreamer *ES = nullptr; @@ -243,8 +241,13 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::ARM: ES = new ARMException(this); break; - case ExceptionHandling::WinEH: - ES = new Win64Exception(this); + case ExceptionHandling::ItaniumWinEH: + switch (MAI->getWinEHEncodingType()) { + default: llvm_unreachable("unsupported unwinding information encoding"); + case WinEH::EncodingType::Itanium: + ES = new Win64Exception(this); + break; + } break; } if (ES) @@ -253,33 +256,10 @@ bool AsmPrinter::doInitialization(Module &M) { } static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) { - GlobalValue::LinkageTypes Linkage = GV->getLinkage(); - if (Linkage != GlobalValue::LinkOnceODRLinkage) - return false; - if (!MAI.hasWeakDefCanBeHiddenDirective()) return false; - if (GV->hasUnnamedAddr()) - return true; - - // This is only used for MachO, so right now it doesn't really matter how - // we handle alias. Revisit this once the MachO linker implements aliases. - if (isa(GV)) - return false; - - // If it is a non constant variable, it needs to be uniqued across shared - // objects. - if (const GlobalVariable *Var = dyn_cast(GV)) { - if (!Var->isConstant()) - return false; - } - - GlobalStatus GS; - if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) - return true; - - return false; + return canBeOmittedFromSymbolTable(GV); } void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { @@ -361,7 +341,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global @@ -578,20 +558,24 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; - if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { + if (TM.getSubtargetImpl()->getInstrInfo()->isLoadFromStackSlotPostFE(&MI, + FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Reload\n"; } - } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) { + } else if (TM.getSubtargetImpl()->getInstrInfo()->hasLoadFromStackSlot( + &MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Reload\n"; - } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { + } else if (TM.getSubtargetImpl()->getInstrInfo()->isStoreToStackSlotPostFE( + &MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); CommentOS << MMO->getSize() << "-byte Spill\n"; } - } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) { + } else if (TM.getSubtargetImpl()->getInstrInfo()->hasStoreToStackSlot( + &MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } @@ -605,8 +589,9 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// that is an implicit def. void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - OutStreamer.AddComment(Twine("implicit-def: ") + - TM.getRegisterInfo()->getName(RegNo)); + OutStreamer.AddComment( + Twine("implicit-def: ") + + TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); OutStreamer.AddBlankLine(); } @@ -616,7 +601,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { const MachineOperand &Op = MI->getOperand(i); assert(Op.isReg() && "KILL instruction must have only register operands"); Str += ' '; - Str += AP.TM.getRegisterInfo()->getName(Op.getReg()); + Str += AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Op.getReg()); Str += (Op.isDef() ? "" : ""); } AP.OutStreamer.AddComment(Str); @@ -627,21 +612,27 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { /// of DBG_VALUE, returning true if it was able to do so. A false return /// means the target will need to handle MI in EmitInstruction. static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { - // This code handles only the 3-operand target-independent form. - if (MI->getNumOperands() != 3) + // This code handles only the 4-operand target-independent form. + if (MI->getNumOperands() != 4) return false; SmallString<128> Str; raw_svector_ostream OS(Str); OS << "DEBUG_VALUE: "; - DIVariable V(MI->getOperand(2).getMetadata()); + DIVariable V = MI->getDebugVariable(); if (V.getContext().isSubprogram()) { StringRef Name = DISubprogram(V.getContext()).getDisplayName(); if (!Name.empty()) OS << Name << ":"; } - OS << V.getName() << " <- "; + OS << V.getName(); + + DIExpression Expr = MI->getDebugExpression(); + if (Expr.isVariablePiece()) + OS << " [piece offset=" << Expr.getPieceOffset() + << " size=" << Expr.getPieceSize() << "]"; + OS << " <- "; // The second operand is only an offset if it's an immediate. bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); @@ -672,7 +663,8 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { Reg = MI->getOperand(0).getReg(); } else { assert(MI->getOperand(0).isFI() && "Unknown operand type"); - const TargetFrameLowering *TFI = AP.TM.getFrameLowering(); + const TargetFrameLowering *TFI = + AP.TM.getSubtargetImpl()->getFrameLowering(); Offset += TFI->getFrameIndexReference(*AP.MF, MI->getOperand(0).getIndex(), Reg); Deref = true; @@ -686,7 +678,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } if (Deref) OS << '['; - OS << AP.TM.getRegisterInfo()->getName(Reg); + OS << AP.TM.getSubtargetImpl()->getRegisterInfo()->getName(Reg); } if (Deref) @@ -709,8 +701,8 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { } bool AsmPrinter::needsSEHMoves() { - return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH && - MF->getFunction()->needsUnwindTableEntry(); + return MAI->getExceptionHandlingType() == ExceptionHandling::ItaniumWinEH && + MF->getFunction()->needsUnwindTableEntry(); } void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { @@ -722,9 +714,6 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { if (needsCFIMoves() == CFI_M_None) return; - if (MMI->getCompactUnwindEncoding() != 0) - OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); - const MachineModuleInfo &MMI = MF->getMMI(); const std::vector &Instrs = MMI.getFrameInstructions(); unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); @@ -742,12 +731,10 @@ void AsmPrinter::EmitFunctionBody() { // Print out code for the function. bool HasAnyRealCode = false; - const MachineInstr *LastMI = nullptr; for (auto &MBB : *MF) { // Print a label for the basic block. EmitBasicBlockStart(MBB); for (auto &MI : MBB) { - LastMI = &MI; // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && @@ -804,26 +791,22 @@ void AsmPrinter::EmitFunctionBody() { } } } - } - // If the last instruction was a prolog label, then we have a situation where - // we emitted a prolog but no function body. This results in the ending prolog - // label equaling the end of function label and an invalid "row" in the - // FDE. We need to emit a noop in this situation so that the FDE's rows are - // valid. - bool RequiresNoop = LastMI && LastMI->isCFIInstruction(); + EmitBasicBlockEnd(MBB); + } // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the // labels from collapsing together. Just emit a noop. - if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) { + if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) { MCInst Noop; - TM.getInstrInfo()->getNoopForMachoTarget(Noop); - if (Noop.getOpcode()) { - OutStreamer.AddComment("avoids zero-length function"); + TM.getSubtargetImpl()->getInstrInfo()->getNoopForMachoTarget(Noop); + OutStreamer.AddComment("avoids zero-length function"); + + // Targets can opt-out of emitting the noop here by leaving the opcode + // unspecified. + if (Noop.getOpcode()) OutStreamer.EmitInstruction(Noop, getSubtargetInfo()); - } else // Target not mc-ized yet. - OutStreamer.EmitRawText(StringRef("\tnop\n")); } const Function *F = MF->getFunction(); @@ -895,17 +878,18 @@ bool AsmPrinter::doFinalization(Module &M) { unsigned Arch = Triple(getTargetTriple()).getArch(); bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); MCInst TrapInst; - TM.getInstrInfo()->getTrap(TrapInst); + TM.getSubtargetImpl()->getInstrInfo()->getTrap(TrapInst); + unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment()); + + // Emit the right section for these functions. + OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection()); for (const auto &KV : JITI->getTables()) { uint64_t Count = 0; for (const auto &FunPair : KV.second) { // Emit the function labels to make this be a function entry point. MCSymbol *FunSym = OutContext.GetOrCreateSymbol(FunPair.second->getName()); - OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global); - // FIXME: JumpTableInstrInfo should store information about the required - // alignment of table entries and the size of the padding instruction. - EmitAlignment(3); + EmitAlignment(LogAlignment); if (IsThumb) OutStreamer.EmitThumbFunc(FunSym); if (MAI->hasDotTypeDotSizeDirective()) @@ -920,16 +904,16 @@ bool AsmPrinter::doFinalization(Module &M) { const MCSymbolRefExpr *TargetSymRef = MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, OutContext); - TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef); + TM.getSubtargetImpl()->getInstrInfo()->getUnconditionalBranch( + JumpToFun, TargetSymRef); OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo()); ++Count; } // Emit enough padding instructions to fill up to the next power of two. - // This assumes that the trap instruction takes 8 bytes or fewer. uint64_t Remaining = NextPowerOf2(Count) - Count; for (uint64_t C = 0; C < Remaining; ++C) { - EmitAlignment(3); + EmitAlignment(LogAlignment); OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo()); } @@ -976,24 +960,21 @@ bool AsmPrinter::doFinalization(Module &M) { } } - if (MAI->hasSetDirective()) { - OutStreamer.AddBlankLine(); - for (const auto &Alias : M.aliases()) { - MCSymbol *Name = getSymbol(&Alias); + OutStreamer.AddBlankLine(); + for (const auto &Alias : M.aliases()) { + MCSymbol *Name = getSymbol(&Alias); - if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) - OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); - else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) - OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); - else - assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); + if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); + else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) + OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); - EmitVisibility(Name, Alias.getVisibility()); + EmitVisibility(Name, Alias.getVisibility()); - // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, - lowerConstant(Alias.getAliasee(), *this)); - } + // Emit the directives as assignments aka .set: + OutStreamer.EmitAssignment(Name, lowerConstant(Alias.getAliasee(), *this)); } GCModuleInfo *MI = getAnalysisIfAvailable(); @@ -1062,23 +1043,14 @@ void AsmPrinter::EmitConstantPool() { const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - SectionKind Kind; - switch (CPE.getRelocationInfo()) { - default: llvm_unreachable("Unknown section kind"); - case 2: Kind = SectionKind::getReadOnlyWithRel(); break; - case 1: - Kind = SectionKind::getReadOnlyWithRelLocal(); - break; - case 0: - switch (TM.getDataLayout()->getTypeAllocSize(CPE.getType())) { - case 4: Kind = SectionKind::getMergeableConst4(); break; - case 8: Kind = SectionKind::getMergeableConst8(); break; - case 16: Kind = SectionKind::getMergeableConst16();break; - default: Kind = SectionKind::getMergeableConst(); break; - } - } + SectionKind Kind = + CPE.getSectionKind(TM.getSubtargetImpl()->getDataLayout()); + + const Constant *C = nullptr; + if (!CPE.isMachineConstantPoolEntry()) + C = CPE.Val.ConstVal; - const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); + const MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C); // The number of sections are small, just do a linear search from the // last section to the first. @@ -1101,13 +1073,22 @@ void AsmPrinter::EmitConstantPool() { } // Now print stuff into the calculated sections. + const MCSection *CurSection = nullptr; + unsigned Offset = 0; for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { - OutStreamer.SwitchSection(CPSections[i].S); - EmitAlignment(Log2_32(CPSections[i].Alignment)); - - unsigned Offset = 0; for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) { unsigned CPI = CPSections[i].CPEs[j]; + MCSymbol *Sym = GetCPISymbol(CPI); + if (!Sym->isUndefined()) + continue; + + if (CurSection != CPSections[i].S) { + OutStreamer.SwitchSection(CPSections[i].S); + EmitAlignment(Log2_32(CPSections[i].Alignment)); + CurSection = CPSections[i].S; + Offset = 0; + } + MachineConstantPoolEntry CPE = CP[CPI]; // Emit inter-object padding for alignment. @@ -1116,9 +1097,10 @@ void AsmPrinter::EmitConstantPool() { OutStreamer.EmitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); - Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); - OutStreamer.EmitLabel(GetCPISymbol(CPI)); + Offset = NewOffset + + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(Ty); + OutStreamer.EmitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); else @@ -1131,7 +1113,7 @@ void AsmPrinter::EmitConstantPool() { /// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { - const DataLayout *DL = MF->getTarget().getDataLayout(); + const DataLayout *DL = MF->getSubtarget().getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (!MJTI) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; @@ -1156,12 +1138,14 @@ void AsmPrinter::EmitJumpTableInfo() { } else { // Otherwise, drop it in the readonly section. const MCSection *ReadOnlySection = - getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); + getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(), + /*C=*/nullptr); OutStreamer.SwitchSection(ReadOnlySection); JTInDiffSection = true; } - EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getDataLayout()))); + EmitAlignment(Log2_32( + MJTI->getEntryAlignment(*TM.getSubtargetImpl()->getDataLayout()))); // Jump tables in code sections are marked with a data_region directive // where that's supported. @@ -1174,17 +1158,17 @@ void AsmPrinter::EmitJumpTableInfo() { // If this jump table was deleted, ignore it. if (JTBBs.empty()) continue; - // For the EK_LabelDifference32 entry, if the target supports .set, emit a - // .set directive for each unique entry. This reduces the number of - // relocations the assembler will generate for the jump table. + // For the EK_LabelDifference32 entry, if using .set avoids a relocation, + /// emit a .set directive for each unique entry. if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && - MAI->hasSetDirective()) { + MAI->doesSetDirectiveSuppressesReloc()) { SmallPtrSet EmittedSets; - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { const MachineBasicBlock *MBB = JTBBs[ii]; - if (!EmittedSets.insert(MBB)) continue; + if (!EmittedSets.insert(MBB).second) + continue; // .set LJTSet, LBB32-base const MCExpr *LHS = @@ -1223,8 +1207,9 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, case MachineJumpTableInfo::EK_Inline: llvm_unreachable("Cannot emit EK_Inline jump table entry"); case MachineJumpTableInfo::EK_Custom32: - Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID, - OutContext); + Value = + TM.getSubtargetImpl()->getTargetLowering()->LowerCustomJumpTableEntry( + MJTI, MBB, UID, OutContext); break; case MachineJumpTableInfo::EK_BlockAddress: // EK_BlockAddress - Each entry is a plain address of block, e.g.: @@ -1250,34 +1235,30 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, } case MachineJumpTableInfo::EK_LabelDifference32: { - // EK_LabelDifference32 - Each entry is the address of the block minus - // the address of the jump table. This is used for PIC jump tables where - // gprel32 is not supported. e.g.: + // Each entry is the address of the block minus the address of the jump + // table. This is used for PIC jump tables where gprel32 is not supported. + // e.g.: // .word LBB123 - LJTI1_2 - // If the .set directive is supported, this is emitted as: + // If the .set directive avoids relocations, this is emitted as: // .set L4_5_set_123, LBB123 - LJTI1_2 // .word L4_5_set_123 - - // If we have emitted set directives for the jump table entries, print - // them rather than the entries themselves. If we're emitting PIC, then - // emit the table entries as differences between two text section labels. - if (MAI->hasSetDirective()) { - // If we used .set, reference the .set's symbol. + if (MAI->doesSetDirectiveSuppressesReloc()) { Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()), OutContext); break; } - // Otherwise, use the difference as the jump table entry. Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); - const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext); - Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext); + Value = MCBinaryExpr::CreateSub(Value, Base, OutContext); break; } } assert(Value && "Unknown entry kind!"); - unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout()); + unsigned EntrySize = + MJTI->getEntrySize(*TM.getSubtargetImpl()->getDataLayout()); OutStreamer.EmitValue(Value, EntrySize); } @@ -1387,7 +1368,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); unsigned Align = Log2_32(DL->getPointerPrefAlignment()); std::stable_sort(Structors.begin(), Structors.end(), [](const Structor &L, @@ -1450,9 +1431,9 @@ void AsmPrinter::EmitInt32(int Value) const { OutStreamer.EmitIntValue(Value, 4); } -/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size -/// in bytes of the directive is specified by Size and Hi/Lo specify the -/// labels. This implicitly uses .set if it is available. +/// Emit something like ".long Hi-Lo" where the size in bytes of the directive +/// is specified by Size and Hi/Lo specify the labels. This implicitly uses +/// .set if it avoids relocations. void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const { // Get the Hi-Lo expression. @@ -1461,7 +1442,7 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, MCSymbolRefExpr::Create(Lo, OutContext), OutContext); - if (!MAI->hasSetDirective()) { + if (!MAI->doesSetDirectiveSuppressesReloc()) { OutStreamer.EmitValue(Diff, Size); return; } @@ -1472,36 +1453,6 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutStreamer.EmitSymbolValue(SetLabel, Size); } -/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" -/// where the size in bytes of the directive is specified by Size and Hi/Lo -/// specify the labels. This implicitly uses .set if it is available. -void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, - const MCSymbol *Lo, - unsigned Size) const { - - // Emit Hi+Offset - Lo - // Get the Hi+Offset expression. - const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), - MCConstantExpr::Create(Offset, OutContext), - OutContext); - - // Get the Hi+Offset-Lo expression. - const MCExpr *Diff = - MCBinaryExpr::CreateSub(Plus, - MCSymbolRefExpr::Create(Lo, OutContext), - OutContext); - - if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, Size); - else { - // Otherwise, emit with .set (aka assignment). - MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); - OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, Size); - } -} - /// EmitLabelPlusOffset - Emit something like ".long Label+Offset" /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. @@ -1531,7 +1482,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // if required for correctness. // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { - if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); + if (GV) + NumBits = getGVAlignmentLog2(GV, *TM.getSubtargetImpl()->getDataLayout(), + NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. @@ -1577,8 +1530,8 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression( + CE, AP.TM.getSubtargetImpl()->getDataLayout())) if (C != CE) return lowerConstant(C, AP); @@ -1592,7 +1545,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); // Generate a symbolic expression for the byte address APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); cast(CE)->accumulateConstantOffset(DL, OffsetAI); @@ -1616,7 +1569,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return lowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); @@ -1626,7 +1579,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { } case Instruction::PtrToInt: { - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -1699,7 +1652,8 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { if (const ConstantInt *CI = dyn_cast(V)) { if (CI->getBitWidth() > 64) return -1; - uint64_t Size = TM.getDataLayout()->getTypeAllocSize(V->getType()); + uint64_t Size = + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(V->getType()); uint64_t Value = CI->getZExtValue(); // Make sure the constant is at least 8 bits long and has a power @@ -1743,7 +1697,9 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); if (Value != -1) { - uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); + uint64_t Bytes = + AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) return AP.OutStreamer.EmitFill(Bytes, Value); @@ -1793,7 +1749,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, } } - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); unsigned Size = DL.getTypeAllocSize(CDS->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); @@ -1808,7 +1764,9 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { int Value = isRepeatedByteSequence(CA, AP.TM); if (Value != -1) { - uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); + uint64_t Bytes = + AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + CA->getType()); AP.OutStreamer.EmitFill(Bytes, Value); } else { @@ -1821,7 +1779,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) emitGlobalConstantImpl(CV->getOperand(i), AP); - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); unsigned Size = DL.getTypeAllocSize(CV->getType()); unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); @@ -1831,7 +1789,7 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *DL = AP.TM.getDataLayout(); + const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); unsigned Size = DL->getTypeAllocSize(CS->getType()); const StructLayout *Layout = DL->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; @@ -1881,7 +1839,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getDataLayout()->isBigEndian() && + if (AP.TM.getSubtargetImpl()->getDataLayout()->isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; @@ -1900,13 +1858,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { } // Emit the tail padding for the long double. - const DataLayout &DL = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getSubtargetImpl()->getDataLayout(); AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) - DL.getTypeStoreSize(CFP->getType())); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getDataLayout(); + const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); unsigned BitWidth = CI->getBitWidth(); // Copy the value as we may massage the layout for constants whose bit width @@ -1952,7 +1910,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // Emit the extra bits after the 64-bits chunks. // Emit a directive that fills the expected size. - uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(CI->getType()); + uint64_t Size = AP.TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize( + CI->getType()); Size -= (BitWidth / 64) * 8; assert(Size && Size * 8 >= ExtraBitsSize && (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) @@ -1962,7 +1921,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { } static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { - const DataLayout *DL = AP.TM.getDataLayout(); + const DataLayout *DL = AP.TM.getSubtargetImpl()->getDataLayout(); uint64_t Size = DL->getTypeAllocSize(CV->getType()); if (isa(CV) || isa(CV)) return AP.OutStreamer.EmitZeros(Size); @@ -2027,7 +1986,8 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { /// EmitGlobalConstant - Print a general LLVM constant to the .s file. void AsmPrinter::EmitGlobalConstant(const Constant *CV) { - uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); + uint64_t Size = + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) emitGlobalConstantImpl(CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { @@ -2056,7 +2016,7 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { /// GetTempSymbol - Return the MCSymbol corresponding to the assembler /// temporary label with the specified stem and unique ID. MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) + Name + Twine(ID)); } @@ -2064,7 +2024,7 @@ MCSymbol *AsmPrinter::GetTempSymbol(Twine Name, unsigned ID) const { /// GetTempSymbol - Return an assembler temporary label with the specified /// stem. MCSymbol *AsmPrinter::GetTempSymbol(Twine Name) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Name); } @@ -2080,7 +2040,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); return OutContext.GetOrCreateSymbol (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber()) + "_" + Twine(CPID)); @@ -2094,7 +2054,7 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { /// GetJTSetSymbol - Return the symbol for the specified jump table .set /// FIXME: privatize to AsmPrinter. MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); return OutContext.GetOrCreateSymbol (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" + Twine(UID) + "_set_" + Twine(MBBID)); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 02cd12b..05f6a68 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -130,7 +131,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: - return TM.getDataLayout()->getPointerSize(); + return TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); case dwarf::DW_EH_PE_udata2: return 2; case dwarf::DW_EH_PE_udata4: @@ -214,13 +215,10 @@ static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset, Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); } -/// Emit a dwarf register operation for describing -/// - a small value occupying only part of a register or -/// - a small register representing only part of a value. -static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits, - unsigned OffsetInBits) { - assert(SizeInBits > 0 && "zero-sized piece"); - unsigned SizeOfByte = 8; +void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits, + unsigned OffsetInBits) const { + assert(SizeInBits > 0 && "piece has size zero"); + const unsigned SizeOfByte = 8; if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece"); Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits)); @@ -249,13 +247,13 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, unsigned PieceSizeInBits, unsigned PieceOffsetInBits) const { assert(MLoc.isReg() && "MLoc must be a register"); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); // If this is a valid register number, emit it. if (Reg >= 0) { emitDwarfRegOp(Streamer, Reg); - emitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits); + EmitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits); return; } @@ -266,19 +264,19 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, if (Reg >= 0) { unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg()); unsigned Size = TRI->getSubRegIdxSize(Idx); - unsigned Offset = TRI->getSubRegIdxOffset(Idx); + unsigned RegOffset = TRI->getSubRegIdxOffset(Idx); OutStreamer.AddComment("super-register"); emitDwarfRegOp(Streamer, Reg); - if (PieceOffsetInBits == Offset) { - emitDwarfOpPiece(Streamer, Size, Offset); + if (PieceOffsetInBits == RegOffset) { + EmitDwarfOpPiece(Streamer, Size, RegOffset); } else { // If this is part of a variable in a sub-register at a // non-zero offset, we need to manually shift the value into // place, since the DW_OP_piece describes the part of the // variable, not the position of the subregister. - emitDwarfOpPiece(Streamer, Size, PieceOffsetInBits); - if (Offset) - emitDwarfOpShr(Streamer, Offset); + if (RegOffset) + emitDwarfOpShr(Streamer, RegOffset); + EmitDwarfOpPiece(Streamer, Size, PieceOffsetInBits); } return; } @@ -312,7 +310,7 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, if (Reg >= 0 && Intersection.any()) { OutStreamer.AddComment("sub-register"); emitDwarfRegOp(Streamer, Reg); - emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset); + EmitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset); CurPos = Offset + Size; // Mark it as emitted. @@ -331,7 +329,7 @@ void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, const MachineLocation &MLoc, bool Indirect) const { - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); if (Reg < 0) { // We assume that pointers are always in an addressable register. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h index 2825367..31867dd 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h +++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ -#define CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H #include "llvm/Support/DataTypes.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 46ee0c8..cca5f22 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -110,14 +111,14 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, HasDiagHandler = true; } - MemoryBuffer *Buffer; + std::unique_ptr Buffer; if (isNullTerminated) Buffer = MemoryBuffer::getMemBuffer(Str, ""); else Buffer = MemoryBuffer::getMemBufferCopy(Str, ""); // Tell SrcMgr about this buffer, it takes ownership of the buffer. - SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); std::unique_ptr Parser( createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI)); @@ -146,6 +147,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); + if (MF) { + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); + } // Don't implicitly switch to the text section before the asm. int Res = Parser->Run(/*NoInitialTextSection*/ true, @@ -500,7 +505,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); if (!strcmp(Code, "private")) { OS << DL->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h index 6c01d65..0cc8353 100644 --- a/lib/CodeGen/AsmPrinter/ByteStreamer.h +++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_BYTESTREAMER_H -#define LLVM_CODEGEN_BYTESTREAMER_H +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/AsmPrinter.h" diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index f555f21..e6b7d64 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter DIEHash.cpp DwarfAccelTable.cpp DwarfCFIException.cpp + DwarfCompileUnit.cpp DwarfDebug.cpp DwarfFile.cpp DwarfStringPool.cpp diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index c3dcd9c..50ea369 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -12,12 +12,15 @@ //===----------------------------------------------------------------------===// #include "DIE.h" + +#include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "DwarfUnit.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" @@ -370,6 +373,29 @@ void DIEString::print(raw_ostream &O) const { // DIEEntry Implementation //===----------------------------------------------------------------------===// +/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the +/// directive is specified by Size and Hi/Lo specify the labels. +static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi, + uint64_t Offset, const MCSymbol *Lo, + unsigned Size) { + MCContext &Context = Streamer.getContext(); + + // Emit Hi+Offset - Lo + // Get the Hi+Offset expression. + const MCExpr *Plus = + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context), + MCConstantExpr::Create(Offset, Context), Context); + + // Get the Hi+Offset-Lo expression. + const MCExpr *Diff = MCBinaryExpr::CreateSub( + Plus, MCSymbolRefExpr::Create(Lo, Context), Context); + + // Otherwise, emit with .set (aka assignment). + MCSymbol *SetLabel = Context.CreateTempSymbol(); + Streamer.EmitAssignment(SetLabel, Diff); + Streamer.EmitSymbolValue(SetLabel, Size); +} + /// EmitValue - Emit debug information entry offset. /// void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { @@ -388,9 +414,9 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, DIEEntry::getRefAddrSize(AP)); else - AP->EmitLabelOffsetDifference(CU->getSectionSym(), Addr, - CU->getSectionSym(), - DIEEntry::getRefAddrSize(AP)); + emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr, + CU->getSectionSym(), + DIEEntry::getRefAddrSize(AP)); } else AP->EmitInt32(Entry.getOffset()); } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index ef05f17..e310aef 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DIE_H__ -#define CODEGEN_ASMPRINTER_DIE_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" @@ -381,10 +381,10 @@ public: /// class DIEString : public DIEValue { const DIEValue *Access; - const StringRef Str; + StringRef Str; public: - DIEString(const DIEValue *Acc, const StringRef S) + DIEString(const DIEValue *Acc, StringRef S) : DIEValue(isString), Access(Acc), Str(S) {} /// getString - Grab the string out of the object. diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index c2fad59..b2a3ba8 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -261,7 +261,7 @@ void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, return; } - // otherwise, b) use the letter 'T' as a the marker, ... + // otherwise, b) use the letter 'T' as the marker, ... addULEB128('T'); addULEB128(Attribute); diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index 175d660..872aa0e 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DIEHASH_H__ -#define CODEGEN_ASMPRINTER_DIEHASH_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H #include "DIE.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index a66d08e..0c2a5e5 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -8,25 +8,25 @@ //===----------------------------------------------------------------------===// #include "DbgValueHistoryCalculator.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegisterInfo.h" #include #include -#include +using namespace llvm; #define DEBUG_TYPE "dwarfdebug" -namespace llvm { - // \brief If @MI is a DBG_VALUE with debug value described by a // defined register, returns the number of this register. // In the other case, returns 0. static unsigned isDescribedByReg(const MachineInstr &MI) { assert(MI.isDebugValue()); - assert(MI.getNumOperands() == 3); + assert(MI.getNumOperands() == 4); // If location of variable is described using a register (directly or // indirecltly), this register is always a first operand. return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; @@ -36,7 +36,7 @@ void DbgValueHistoryMap::startInstrRange(const MDNode *Var, const MachineInstr &MI) { // Instruction range should start with a DBG_VALUE instruction for the // variable. - assert(MI.isDebugValue() && MI.getDebugVariable() == Var); + assert(MI.isDebugValue() && "not a DBG_VALUE"); auto &Ranges = VarInstrRanges[Var]; if (!Ranges.empty() && Ranges.back().second == nullptr && Ranges.back().first->isIdenticalTo(&MI)) { @@ -96,6 +96,19 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, VarSet.push_back(Var); } +// \brief Terminate the location range for variables described by register at +// @I by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, + RegDescribedVarsMap::iterator I, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + // Iterate over all variables described by this register and add this + // instruction to their history, clobbering it. + for (const auto &Var : I->second) + HistMap.endInstrRange(Var, ClobberingInstr); + RegVars.erase(I); +} + // \brief Terminate the location range for variables described by register // @RegNo by inserting @ClobberingInstr to their history. static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, @@ -104,22 +117,26 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, const auto &I = RegVars.find(RegNo); if (I == RegVars.end()) return; - // Iterate over all variables described by this register and add this - // instruction to their history, clobbering it. - for (const auto &Var : I->second) - HistMap.endInstrRange(Var, ClobberingInstr); - RegVars.erase(I); + clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr); } -// \brief Collect all registers clobbered by @MI and insert them to @Regs. -static void collectClobberedRegisters(const MachineInstr &MI, +// \brief Collect all registers clobbered by @MI and apply the functor +// @Func to their RegNo. +// @Func should be a functor with a void(unsigned) signature. We're +// not using std::function here for performance reasons. It has a +// small but measurable impact. By using a functor instead of a +// std::set& here, we can avoid the overhead of constructing +// temporaries in calculateDbgValueHistory, which has a significant +// performance impact. +template +static void applyToClobberedRegisters(const MachineInstr &MI, const TargetRegisterInfo *TRI, - std::set &Regs) { + Callable Func) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) - Regs.insert(*AI); + Func(*AI); } } @@ -133,11 +150,12 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { // as the return instruction. DebugLoc LastLoc = LastMI->getDebugLoc(); auto Res = LastMI; - for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend(); - ++I) { + for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)), + E = MBB.rend(); + I != E; ++I) { if (I->getDebugLoc() != LastLoc) return Res; - Res = std::prev(I.base()); + Res = &*I; } // If all instructions have the same debug location, assume whole MBB is // an epilogue. @@ -145,25 +163,26 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { } // \brief Collect registers that are modified in the function body (their -// contents is changed only in the prologue and epilogue). +// contents is changed outside of the prologue and epilogue). static void collectChangingRegs(const MachineFunction *MF, const TargetRegisterInfo *TRI, - std::set &Regs) { + BitVector &Regs) { for (const auto &MBB : *MF) { auto FirstEpilogueInst = getFirstEpilogueInst(MBB); - bool IsInEpilogue = false; + for (const auto &MI : MBB) { - IsInEpilogue |= &MI == FirstEpilogueInst; - if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue) - collectClobberedRegisters(MI, TRI, Regs); + if (&MI == FirstEpilogueInst) + break; + if (!MI.getFlag(MachineInstr::FrameSetup)) + applyToClobberedRegisters(MI, TRI, [&](unsigned r) { Regs.set(r); }); } } } -void calculateDbgValueHistory(const MachineFunction *MF, - const TargetRegisterInfo *TRI, - DbgValueHistoryMap &Result) { - std::set ChangingRegs; +void llvm::calculateDbgValueHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &Result) { + BitVector ChangingRegs(TRI->getNumRegs()); collectChangingRegs(MF, TRI, ChangingRegs); RegDescribedVarsMap RegVars; @@ -172,17 +191,18 @@ void calculateDbgValueHistory(const MachineFunction *MF, if (!MI.isDebugValue()) { // Not a DBG_VALUE instruction. It may clobber registers which describe // some variables. - std::set MIClobberedRegs; - collectClobberedRegisters(MI, TRI, MIClobberedRegs); - for (unsigned RegNo : MIClobberedRegs) { - if (ChangingRegs.count(RegNo)) + applyToClobberedRegisters(MI, TRI, [&](unsigned RegNo) { + if (ChangingRegs.test(RegNo)) clobberRegisterUses(RegVars, RegNo, Result, MI); - } + }); continue; } assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); - const MDNode *Var = MI.getDebugVariable(); + // Use the base variable (without any DW_OP_piece expressions) + // as index into History. The full variables including the + // piece expressions are attached to the MI. + DIVariable Var = MI.getDebugVariable(); if (unsigned PrevReg = Result.getRegisterForVar(Var)) dropRegDescribedVar(RegVars, PrevReg, Var); @@ -196,11 +216,12 @@ void calculateDbgValueHistory(const MachineFunction *MF, // Make sure locations for register-described variables are valid only // until the end of the basic block (unless it's the last basic block, in // which case let their liveness run off to the end of the function). - if (!MBB.empty() && &MBB != &MF->back()) { - for (unsigned RegNo : ChangingRegs) - clobberRegisterUses(RegVars, RegNo, Result, MBB.back()); + if (!MBB.empty() && &MBB != &MF->back()) { + for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) { + auto CurElem = I++; // CurElem can be erased below. + if (ChangingRegs.test(CurElem->first)) + clobberRegisterUses(RegVars, CurElem, Result, MBB.back()); + } } } } - -} diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h index b9177f0..4b62007 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_ -#define CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" @@ -28,9 +28,11 @@ class DbgValueHistoryMap { // range. If end is not specified, location is valid until the start // instruction of the next instruction range, or until the end of the // function. +public: typedef std::pair InstrRange; typedef SmallVector InstrRanges; typedef MapVector InstrRangesMap; +private: InstrRangesMap VarInstrRanges; public: diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 3beb799..6cca985 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__ -#define CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCSymbol.h" namespace llvm { -class DwarfCompileUnit; class MDNode; /// \brief This struct describes location entries emitted in the .debug_loc /// section. @@ -26,25 +26,30 @@ class DebugLocEntry { public: /// A single location or constant. struct Value { - Value(const MDNode *Var, int64_t i) - : Variable(Var), EntryKind(E_Integer) { + Value(const MDNode *Var, const MDNode *Expr, int64_t i) + : Variable(Var), Expression(Expr), EntryKind(E_Integer) { Constant.Int = i; } - Value(const MDNode *Var, const ConstantFP *CFP) - : Variable(Var), EntryKind(E_ConstantFP) { + Value(const MDNode *Var, const MDNode *Expr, const ConstantFP *CFP) + : Variable(Var), Expression(Expr), EntryKind(E_ConstantFP) { Constant.CFP = CFP; } - Value(const MDNode *Var, const ConstantInt *CIP) - : Variable(Var), EntryKind(E_ConstantInt) { + Value(const MDNode *Var, const MDNode *Expr, const ConstantInt *CIP) + : Variable(Var), Expression(Expr), EntryKind(E_ConstantInt) { Constant.CIP = CIP; } - Value(const MDNode *Var, MachineLocation Loc) - : Variable(Var), EntryKind(E_Location), Loc(Loc) { + Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc) + : Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) { + assert(DIVariable(Var).Verify()); + assert(DIExpression(Expr).Verify()); } // The variable to which this location entry corresponds. const MDNode *Variable; + // Any complex address location expression for this Value. + const MDNode *Expression; + // Type of entry that this represents. enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; enum EntryType EntryKind; @@ -59,23 +64,6 @@ public: // Or a location in the machine frame. MachineLocation Loc; - bool operator==(const Value &other) const { - if (EntryKind != other.EntryKind) - return false; - - switch (EntryKind) { - case E_Location: - return Loc == other.Loc; - case E_Integer: - return Constant.Int == other.Constant.Int; - case E_ConstantFP: - return Constant.CFP == other.Constant.CFP; - case E_ConstantInt: - return Constant.CIP == other.Constant.CIP; - } - llvm_unreachable("unhandled EntryKind"); - } - bool isLocation() const { return EntryKind == E_Location; } bool isInt() const { return EntryKind == E_Integer; } bool isConstantFP() const { return EntryKind == E_ConstantFP; } @@ -84,40 +72,114 @@ public: const ConstantFP *getConstantFP() const { return Constant.CFP; } const ConstantInt *getConstantInt() const { return Constant.CIP; } MachineLocation getLoc() const { return Loc; } - const MDNode *getVariable() const { return Variable; } + const MDNode *getVariableNode() const { return Variable; } + DIVariable getVariable() const { return DIVariable(Variable); } + bool isVariablePiece() const { return getExpression().isVariablePiece(); } + DIExpression getExpression() const { return DIExpression(Expression); } + friend bool operator==(const Value &, const Value &); + friend bool operator<(const Value &, const Value &); }; + private: - /// A list of locations/constants belonging to this entry. + /// A nonempty list of locations/constants belonging to this entry, + /// sorted by offset. SmallVector Values; - /// The compile unit that this location entry is referenced by. - const DwarfCompileUnit *Unit; - public: - DebugLocEntry() : Begin(nullptr), End(nullptr), Unit(nullptr) {} - DebugLocEntry(const MCSymbol *B, const MCSymbol *E, - Value Val, const DwarfCompileUnit *U) - : Begin(B), End(E), Unit(U) { + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val) + : Begin(B), End(E) { Values.push_back(std::move(Val)); } + /// \brief If this and Next are describing different pieces of the same + // variable, merge them by appending Next's values to the current + // list of values. + // Return true if the merge was successful. + bool MergeValues(const DebugLocEntry &Next) { + if (Begin == Next.Begin) { + DIExpression Expr(Values[0].Expression); + DIVariable Var(Values[0].Variable); + DIExpression NextExpr(Next.Values[0].Expression); + DIVariable NextVar(Next.Values[0].Variable); + if (Var == NextVar && Expr.isVariablePiece() && + NextExpr.isVariablePiece()) { + addValues(Next.Values); + End = Next.End; + return true; + } + } + return false; + } + /// \brief Attempt to merge this DebugLocEntry with Next and return /// true if the merge was successful. Entries can be merged if they /// share the same Loc/Constant and if Next immediately follows this /// Entry. - bool Merge(const DebugLocEntry &Next) { + bool MergeRanges(const DebugLocEntry &Next) { + // If this and Next are describing the same variable, merge them. if ((End == Next.Begin && Values == Next.Values)) { End = Next.End; return true; } return false; } + const MCSymbol *getBeginSym() const { return Begin; } const MCSymbol *getEndSym() const { return End; } - const DwarfCompileUnit *getCU() const { return Unit; } - const ArrayRef getValues() const { return Values; } - void addValue(Value Val) { Values.push_back(Val); } + ArrayRef getValues() const { return Values; } + void addValues(ArrayRef Vals) { + Values.append(Vals.begin(), Vals.end()); + sortUniqueValues(); + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){ + return V.isVariablePiece(); + }) && "value must be a piece"); + } + + // Sort the pieces by offset. + // Remove any duplicate entries by dropping all but the first. + void sortUniqueValues() { + std::sort(Values.begin(), Values.end()); + Values.erase(std::unique(Values.begin(), Values.end(), + [](const Value &A, const Value &B) { + return A.getVariable() == B.getVariable() && + A.getExpression() == B.getExpression(); + }), + Values.end()); + } }; +/// Compare two Values for equality. +inline bool operator==(const DebugLocEntry::Value &A, + const DebugLocEntry::Value &B) { + if (A.EntryKind != B.EntryKind) + return false; + + if (A.Expression != B.Expression) + return false; + + if (A.Variable != B.Variable) + return false; + + switch (A.EntryKind) { + case DebugLocEntry::Value::E_Location: + return A.Loc == B.Loc; + case DebugLocEntry::Value::E_Integer: + return A.Constant.Int == B.Constant.Int; + case DebugLocEntry::Value::E_ConstantFP: + return A.Constant.CFP == B.Constant.CFP; + case DebugLocEntry::Value::E_ConstantInt: + return A.Constant.CIP == B.Constant.CIP; + } + llvm_unreachable("unhandled EntryKind"); +} + +/// Compare two pieces based on their offset. +inline bool operator<(const DebugLocEntry::Value &A, + const DebugLocEntry::Value &B) { + return A.getExpression().getPieceOffset() < + B.getExpression().getPieceOffset(); } + +} + #endif diff --git a/lib/CodeGen/AsmPrinter/DebugLocList.h b/lib/CodeGen/AsmPrinter/DebugLocList.h index 7a51c7b..2a4f58f 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocList.h +++ b/lib/CodeGen/AsmPrinter/DebugLocList.h @@ -7,16 +7,18 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__ -#define CODEGEN_ASMPRINTER_DEBUGLOCLIST_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCLIST_H -#include "llvm/MC/MCSymbol.h" #include "llvm/ADT/SmallVector.h" #include "DebugLocEntry.h" namespace llvm { +class DwarfCompileUnit; +class MCSymbol; struct DebugLocList { MCSymbol *Label; + DwarfCompileUnit *CU; SmallVector List; }; } diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index e9527c4..7e87566 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -13,6 +13,7 @@ #include "DwarfAccelTable.h" #include "DIE.h" +#include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" @@ -174,7 +175,8 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D, + MCSymbol *StrSym) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), @@ -183,13 +185,14 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); - Asm->EmitSectionOffset((*HI)->Data.StrSym, - D->getStringPool().getSectionSymbol()); + Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.Values.size()); for (HashDataContents *HD : (*HI)->Data.Values) { // Emit the DIE offset - Asm->EmitInt32(HD->Die->getOffset()); + DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit()); + assert(CU && "Accelerated DIE should belong to a CU."); + Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset()); // If we have multiple Atoms emit that info too. // FIXME: A bit of a hack, we either emit only one atom or all info. if (HeaderData.Atoms.size() > 1) { @@ -206,7 +209,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D, + MCSymbol *StrSym) { // Emit the header. EmitHeader(Asm); @@ -220,7 +224,7 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfFile *D) { EmitOffsets(Asm, SecBegin); // Emit the hash data. - EmitData(Asm, D); + EmitData(Asm, D, StrSym); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index a3cc95f..3cdf678 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ -#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H #include "DIE.h" #include "llvm/ADT/ArrayRef.h" @@ -62,7 +62,7 @@ namespace llvm { class AsmPrinter; -class DwarfFile; +class DwarfDebug; class DwarfAccelTable { @@ -140,7 +140,7 @@ public: private: struct TableHeaderData { uint32_t die_offset_base; - SmallVector Atoms; + SmallVector Atoms; TableHeaderData(ArrayRef AtomList, uint32_t offset = 0) : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} @@ -223,7 +223,7 @@ private: void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfFile *D); + void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -248,7 +248,7 @@ public: void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die, char Flags = 0); void FinalizeTable(AsmPrinter *, StringRef); - void Emit(AsmPrinter *, MCSymbol *, DwarfFile *); + void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 74215aa..0dc52da 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -51,7 +51,7 @@ void DwarfCFIException::endModule() { if (moveTypeModule == AsmPrinter::CFI_M_Debug) Asm->OutStreamer.EmitCFISections(false, true); - if (!Asm->MAI->isExceptionHandlingDwarf()) + if (!Asm->MAI->usesItaniumLSDAForExceptions()) return; const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 0000000..2f1b0e5 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,862 @@ +#include "DwarfCompileUnit.h" + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +namespace llvm { + +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) + : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), + Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) { + insertDIE(Node, &getUnitDie()); +} + +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + + // Don't use the address pool in non-fission or in the skeleton unit itself. + // FIXME: Once GDB supports this, it's probably worthwhile using the address + // pool from the skeleton - maybe even in non-fission (possibly fewer + // relocations by sharing them in the pool, but we have other ideas about how + // to reduce the number of relocations as well/instead). + if (!DD->useSplitDwarf() || !Skeleton) + return addLocalLabelAddress(Die, Attribute, Label); + + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + unsigned idx = DD->getAddressPool().getIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); +} + +void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, + dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + Die.addValue(Attribute, dwarf::DW_FORM_addr, + Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label) + : new (DIEValueAllocator) DIEInteger(0)); +} + +unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, + StringRef DirName) { + // If we print assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + return Asm->OutStreamer.EmitDwarfFileDirective( + 0, DirName, FileName, + Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID()); +} + +// Return const expression if value is a GEP to access merged global +// constant. e.g. +// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) +static const ConstantExpr *getMergedGlobalExpr(const Value *V) { + const ConstantExpr *CE = dyn_cast_or_null(V); + if (!CE || CE->getNumOperands() != 3 || + CE->getOpcode() != Instruction::GetElementPtr) + return nullptr; + + // First operand points to a global struct. + Value *Ptr = CE->getOperand(0); + if (!isa(Ptr) || + !isa(cast(Ptr->getType())->getElementType())) + return nullptr; + + // Second operand is zero. + const ConstantInt *CI = dyn_cast_or_null(CE->getOperand(1)); + if (!CI || !CI->isZero()) + return nullptr; + + // Third operand is offset. + if (!isa(CE->getOperand(2))) + return nullptr; + + return CE; +} + +/// getOrCreateGlobalVariableDIE - get or create global variable DIE. +DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) { + // Check for pre-existence. + if (DIE *Die = getDIE(GV)) + return Die; + + assert(GV.isGlobalVariable()); + + DIScope GVContext = DD->resolve(GV.getContext()); + DIType GTy = DD->resolve(GV.getType()); + + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(GVContext); + + // Add to map. + DIE *VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV); + DIScope DeclContext; + + if (DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration()) { + DeclContext = resolve(SDMDecl.getContext()); + assert(SDMDecl.isStaticMember() && "Expected static member decl"); + assert(GV.isDefinition()); + // We need the declaration DIE that is in the static member's class. + DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); + addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); + } else { + DeclContext = resolve(GV.getContext()); + // Add name and type. + addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(*VariableDIE, GTy); + + // Add scoping info. + if (!GV.isLocalToUnit()) + addFlag(*VariableDIE, dwarf::DW_AT_external); + + // Add line number info. + addSourceLine(*VariableDIE, GV); + } + + if (!GV.isDefinition()) + addFlag(*VariableDIE, dwarf::DW_AT_declaration); + + // Add location. + bool addToAccelTable = false; + bool isGlobalVariable = GV.getGlobal() != nullptr; + if (isGlobalVariable) { + addToAccelTable = true; + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); + if (GV.getGlobal()->isThreadLocal()) { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by a custom OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + } + + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + // Add the linkage name. + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty()) + // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: + // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and + // TAG_variable. + addString(*VariableDIE, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); + } else if (const ConstantInt *CI = + dyn_cast_or_null(GV.getConstant())) { + addConstantValue(*VariableDIE, CI, GTy); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV.getConstant())) { + addToAccelTable = true; + // GV is a merged global. + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + Value *Ptr = CE->getOperand(0); + MCSymbol *Sym = Asm->getSymbol(cast(Ptr)); + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(*Loc, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + } + + if (addToAccelTable) { + DD->addAccelName(GV.getName(), *VariableDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) + DD->addAccelName(GV.getLinkageName(), *VariableDIE); + } + + addGlobalName(GV.getName(), *VariableDIE, DeclContext); + return VariableDIE; +} + +void DwarfCompileUnit::addRange(RangeSpan Range) { + bool SameAsPrevCU = this == DD->getPrevCU(); + DD->setPrevCU(this); + // If we have no current ranges just add the range and return, otherwise, + // check the current section and CU against the previous section and CU we + // emitted into and the subprogram was contained within. If these are the + // same then extend our current range, otherwise add this as a new range. + if (CURanges.empty() || !SameAsPrevCU || + (&CURanges.back().getEnd()->getSection() != + &Range.getEnd()->getSection())) { + CURanges.push_back(Range); + return; + } + + CURanges.back().setEnd(Range.getEnd()); +} + +void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, + const MCSymbol *Sec) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + addLabel(Die, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Label); + else + addSectionDelta(Die, Attribute, Label, Sec); +} + +void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = + Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); + + stmtListIndex = UnitDie.getValues().size(); + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. + addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, + DwarfLineSectionSym); +} + +void DwarfCompileUnit::applyStmtList(DIE &D) { + D.addValue(dwarf::DW_AT_stmt_list, + UnitDie.getAbbrev().getData()[stmtListIndex].getForm(), + UnitDie.getValues()[stmtListIndex]); +} + +void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, + const MCSymbol *End) { + assert(Begin && "Begin label should not be null!"); + assert(End && "End label should not be null!"); + assert(Begin->isDefined() && "Invalid starting label"); + assert(End->isDefined() && "Invalid end label"); + + addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); + if (DD->getDwarfVersion() < 4) + addLabelAddress(D, dwarf::DW_AT_high_pc, End); + else + addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); +} + +// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc +// and DW_AT_high_pc attributes. If there are global variables in this +// scope then create and insert DIEs for these variables. +DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) { + DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); + + attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym()); + if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( + *DD->getCurrentFunction())) + addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); + + // Only include DW_AT_frame_base in full debug info + if (!includeMinimalInlineScopes()) { + const TargetRegisterInfo *RI = + Asm->TM.getSubtargetImpl()->getRegisterInfo(); + MachineLocation Location(RI->getFrameRegister(*Asm->MF)); + addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + } + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_subprogram nodes. + DD->addSubprogramNames(SP, *SPDie); + + return *SPDie; +} + +// Construct a DIE for this scope. +void DwarfCompileUnit::constructScopeDIE( + LexicalScope *Scope, SmallVectorImpl> &FinalChildren) { + if (!Scope || !Scope->getScopeNode()) + return; + + DIScope DS(Scope->getScopeNode()); + + assert((Scope->getInlinedAt() || !DS.isSubprogram()) && + "Only handle inlined subprograms here, use " + "constructSubprogramScopeDIE for non-inlined " + "subprograms"); + + SmallVector, 8> Children; + + // We try to create the scope DIE first, then the children DIEs. This will + // avoid creating un-used children then removing them later when we find out + // the scope DIE is null. + std::unique_ptr ScopeDIE; + if (Scope->getParent() && DS.isSubprogram()) { + ScopeDIE = constructInlinedScopeDIE(Scope); + if (!ScopeDIE) + return; + // We create children when the scope DIE is not null. + createScopeChildrenDIE(Scope, Children); + } else { + // Early exit when we know the scope DIE is going to be null. + if (DD->isLexicalScopeDIENull(Scope)) + return; + + unsigned ChildScopeCount; + + // We create children here when we know the scope DIE is not going to be + // null and the children will be added to the scope DIE. + createScopeChildrenDIE(Scope, Children, &ChildScopeCount); + + // Skip imported directives in gmlt-like data. + if (!includeMinimalInlineScopes()) { + // There is no need to emit empty lexical block DIE. + for (const auto &E : DD->findImportedEntitiesForScope(DS)) + Children.push_back( + constructImportedEntityDIE(DIImportedEntity(E.second))); + } + + // If there are only other scopes as children, put them directly in the + // parent instead, as this scope would serve no purpose. + if (Children.size() == ChildScopeCount) { + FinalChildren.insert(FinalChildren.end(), + std::make_move_iterator(Children.begin()), + std::make_move_iterator(Children.end())); + return; + } + ScopeDIE = constructLexicalScopeDIE(Scope); + assert(ScopeDIE && "Scope DIE should not be null."); + } + + // Add children + for (auto &I : Children) + ScopeDIE->addChild(std::move(I)); + + FinalChildren.push_back(std::move(ScopeDIE)); +} + +void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Value); +} + +void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, + SmallVector Range) { + // Emit offset in .debug_range as a relocatable label. emitDIE will handle + // emitting it appropriately. + auto *RangeSectionSym = DD->getRangeSectionSym(); + + RangeSpanList List( + Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()), + std::move(Range)); + + // Under fission, ranges are specified by constant offsets relative to the + // CU's DW_AT_GNU_ranges_base. + if (isDwoUnit()) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + else + addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + + // Add the range list to the set of ranges to be emitted. + (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List)); +} + +void DwarfCompileUnit::attachRangesOrLowHighPC( + DIE &Die, SmallVector Ranges) { + if (Ranges.size() == 1) { + const auto &single = Ranges.front(); + attachLowHighPC(Die, single.getStart(), single.getEnd()); + } else + addScopeRangeList(Die, std::move(Ranges)); +} + +void DwarfCompileUnit::attachRangesOrLowHighPC( + DIE &Die, const SmallVectorImpl &Ranges) { + SmallVector List; + List.reserve(Ranges.size()); + for (const InsnRange &R : Ranges) + List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first), + DD->getLabelAfterInsn(R.second))); + attachRangesOrLowHighPC(Die, std::move(List)); +} + +// This scope represents inlined body of a function. Construct DIE to +// represent this concrete inlined copy of the function. +std::unique_ptr +DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { + assert(Scope->getScopeNode()); + DIScope DS(Scope->getScopeNode()); + DISubprogram InlinedSP = getDISubprogram(DS); + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP]; + assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); + + auto ScopeDIE = make_unique(dwarf::DW_TAG_inlined_subroutine); + addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); + + attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); + + // Add the call site information to the DIE. + DILocation DL(Scope->getInlinedAt()); + addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, + getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); + addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_inlined_subprogram nodes. + DD->addSubprogramNames(InlinedSP, *ScopeDIE); + + return ScopeDIE; +} + +// Construct new DW_TAG_lexical_block for this scope and attach +// DW_AT_low_pc/DW_AT_high_pc labels. +std::unique_ptr +DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) { + if (DD->isLexicalScopeDIENull(Scope)) + return nullptr; + + auto ScopeDIE = make_unique(dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; + + attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); + + return ScopeDIE; +} + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +std::unique_ptr DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, + bool Abstract) { + auto D = constructVariableDIEImpl(DV, Abstract); + DV.setDIE(*D); + return D; +} + +std::unique_ptr +DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract) { + // Define variable debug information entry. + auto VariableDie = make_unique(DV.getTag()); + + if (Abstract) { + applyVariableAttributes(DV, *VariableDie); + return VariableDie; + } + + // Add variable address. + + unsigned Offset = DV.getDotDebugLocOffset(); + if (Offset != ~0U) { + addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); + return VariableDie; + } + + // Check if variable is described by a DBG_VALUE instruction. + if (const MachineInstr *DVInsn = DV.getMInsn()) { + assert(DVInsn->getNumOperands() == 4); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + // If the second operand is an immediate, this is an indirect value. + if (DVInsn->getOperand(1).isImm()) { + MachineLocation Location(RegOp.getReg(), + DVInsn->getOperand(1).getImm()); + addVariableAddress(DV, *VariableDie, Location); + } else if (RegOp.getReg()) + addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); + } else if (DVInsn->getOperand(0).isImm()) + addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); + else if (DVInsn->getOperand(0).isFPImm()) + addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), + DV.getType()); + + return VariableDie; + } + + // .. else use frame index. + int FI = DV.getFrameIndex(); + if (FI != ~0) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = + Asm->TM.getSubtargetImpl()->getFrameLowering(); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, *VariableDie, Location); + } + + return VariableDie; +} + +std::unique_ptr DwarfCompileUnit::constructVariableDIE( + DbgVariable &DV, const LexicalScope &Scope, DIE *&ObjectPointer) { + auto Var = constructVariableDIE(DV, Scope.isAbstractScope()); + if (DV.isObjectPointer()) + ObjectPointer = Var.get(); + return Var; +} + +DIE *DwarfCompileUnit::createScopeChildrenDIE( + LexicalScope *Scope, SmallVectorImpl> &Children, + unsigned *ChildScopeCount) { + DIE *ObjectPointer = nullptr; + + for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope)) + Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); + + unsigned ChildCountWithoutScopes = Children.size(); + + for (LexicalScope *LS : Scope->getChildren()) + constructScopeDIE(LS, Children); + + if (ChildScopeCount) + *ChildScopeCount = Children.size() - ChildCountWithoutScopes; + + return ObjectPointer; +} + +void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(!Scope->getInlinedAt()); + assert(!Scope->isAbstractScope()); + DISubprogram Sub(Scope->getScopeNode()); + + assert(Sub.isSubprogram()); + + DD->getProcessedSPNodes().insert(Sub); + + DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); + + // If this is a variadic function, add an unspecified parameter. + DITypeArray FnArgs = Sub.getType().getTypeArray(); + + // Collect lexical scope children first. + // ObjectPointer might be a local (non-argument) local variable if it's a + // block's synthetic this pointer. + if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE)) + addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); + + // If we have a single element of null, it is a function that returns void. + // If we have more than one elements and the last one is null, it is a + // variadic function. + if (FnArgs.getNumElements() > 1 && + !FnArgs.getElement(FnArgs.getNumElements() - 1) && + !includeMinimalInlineScopes()) + ScopeDIE.addChild(make_unique(dwarf::DW_TAG_unspecified_parameters)); +} + +DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, + DIE &ScopeDIE) { + // We create children when the scope DIE is not null. + SmallVector, 8> Children; + DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children); + + // Add children + for (auto &I : Children) + ScopeDIE.addChild(std::move(I)); + + return ObjectPointer; +} + +void +DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { + DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()]; + if (AbsDef) + return; + + DISubprogram SP(Scope->getScopeNode()); + + DIE *ContextDIE; + + if (includeMinimalInlineScopes()) + ContextDIE = &getUnitDie(); + // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with + // the important distinction that the DIDescriptor is not associated with the + // DIE (since the DIDescriptor will be associated with the concrete DIE, if + // any). It could be refactored to some common utility function. + else if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + ContextDIE = &getUnitDie(); + getOrCreateSubprogramDIE(SPDecl); + } else + ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + + // Passing null as the associated DIDescriptor because the abstract definition + // shouldn't be found by lookup. + AbsDef = + &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor()); + applySubprogramAttributesToDefinition(SP, *AbsDef); + + if (!includeMinimalInlineScopes()) + addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, *AbsDef)) + addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); +} + +std::unique_ptr +DwarfCompileUnit::constructImportedEntityDIE(const DIImportedEntity &Module) { + assert(Module.Verify() && + "Use one of the MDNode * overloads to handle invalid metadata"); + std::unique_ptr IMDie = make_unique((dwarf::Tag)Module.getTag()); + insertDIE(Module, IMDie.get()); + DIE *EntityDie; + DIDescriptor Entity = resolve(Module.getEntity()); + if (Entity.isNameSpace()) + EntityDie = getOrCreateNameSpace(DINameSpace(Entity)); + else if (Entity.isSubprogram()) + EntityDie = getOrCreateSubprogramDIE(DISubprogram(Entity)); + else if (Entity.isType()) + EntityDie = getOrCreateTypeDIE(DIType(Entity)); + else if (Entity.isGlobalVariable()) + EntityDie = getOrCreateGlobalVariableDIE(DIGlobalVariable(Entity)); + else + EntityDie = getDIE(Entity); + assert(EntityDie); + addSourceLine(*IMDie, Module.getLineNumber(), + Module.getContext().getFilename(), + Module.getContext().getDirectory()); + addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); + StringRef Name = Module.getName(); + if (!Name.empty()) + addString(*IMDie, dwarf::DW_AT_name, Name); + + return IMDie; +} + +void DwarfCompileUnit::finishSubprogramDefinition(DISubprogram SP) { + DIE *D = getDIE(SP); + if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) { + if (D) + // If this subprogram has an abstract definition, reference that + addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); + } else { + if (!D && !includeMinimalInlineScopes()) + // Lazily construct the subprogram if we didn't see either concrete or + // inlined versions during codegen. (except in -gmlt ^ where we want + // to omit these entirely) + D = getOrCreateSubprogramDIE(SP); + if (D) + // And attach the attributes + applySubprogramAttributesToDefinition(SP, *D); + } +} +void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) { + assert(SP.isSubprogram() && "CU's subprogram list contains a non-subprogram"); + assert(SP.isDefinition() && + "CU's subprogram list contains a subprogram declaration"); + DIArray Variables = SP.getVariables(); + if (Variables.getNumElements() == 0) + return; + + DIE *SPDIE = DU->getAbstractSPDies().lookup(SP); + if (!SPDIE) + SPDIE = getDIE(SP); + assert(SPDIE); + for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { + DIVariable DV(Variables.getElement(vi)); + assert(DV.isVariable()); + DbgVariable NewVar(DV, DIExpression(nullptr), DD); + auto VariableDie = constructVariableDIE(NewVar); + applyVariableAttributes(NewVar, *VariableDie); + SPDIE->addChild(std::move(VariableDie)); + } +} + +void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const { + // Don't bother labeling the .dwo unit, as its offset isn't used. + if (!Skeleton) + Asm->OutStreamer.EmitLabel(LabelBegin); + + DwarfUnit::emitHeader(ASectionSym); +} + +/// addGlobalName - Add a new global name to the compile unit. +void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die, + DIScope Context) { + if (includeMinimalInlineScopes()) + return; + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = &Die; +} + +/// Add a new global type to the unit. +void DwarfCompileUnit::addGlobalType(DIType Ty, const DIE &Die, + DIScope Context) { + if (includeMinimalInlineScopes()) + return; + std::string FullName = getParentContextString(Context) + Ty.getName().str(); + GlobalTypes[FullName] = &Die; +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location) { + if (DV.variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV.isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location, + DV.getVariable().isIndirect()); +} + +/// Add an address attribute to a die based on the location provided. +void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location, + bool Indirect) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + + if (Location.isReg() && !Indirect) + addRegisterOpPiece(*Loc, Location.getReg()); + else { + addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + if (Indirect && !Location.isReg()) { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Loc); +} + +/// Start with the address based on the location provided, and generate the +/// DWARF information necessary to find the actual variable given the extra +/// address information encoded in the DbgVariable, starting from the starting +/// location. Add the DWARF information to the die. +void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc(); + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; + if (Location.isReg()) { + if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_plus) { + assert(!DV.getVariable().isIndirect() && + "double indirection not handled"); + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1)); + i = 2; + } else if (N >= 2 && DV.getAddrElement(0) == dwarf::DW_OP_deref) { + assert(!DV.getVariable().isIndirect() && + "double indirection not handled"); + addRegisterOpPiece(*Loc, Location.getReg(), + DV.getExpression().getPieceSize(), + DV.getExpression().getPieceOffset()); + i = 3; + } else + addRegisterOpPiece(*Loc, Location.getReg()); + } else + addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == dwarf::DW_OP_plus) { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); + + } else if (Element == dwarf::DW_OP_deref) { + if (!Location.isReg()) + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + } else if (Element == dwarf::DW_OP_piece) { + const unsigned SizeOfByte = 8; + unsigned PieceOffsetInBits = DV.getAddrElement(++i) * SizeOfByte; + unsigned PieceSizeInBits = DV.getAddrElement(++i) * SizeOfByte; + // Emit DW_OP_bit_piece Size Offset. + assert(PieceSizeInBits > 0 && "piece has zero size"); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece); + addUInt(*Loc, dwarf::DW_FORM_udata, PieceSizeInBits); + addUInt(*Loc, dwarf::DW_FORM_udata, PieceOffsetInBits); + } else + llvm_unreachable("unknown DIBuilder Opcode"); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Loc); +} + +/// Add a Dwarf loclistptr attribute data and value. +void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, + unsigned Index) { + DIEValue *Value = new (DIEValueAllocator) DIELocList(Index); + dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4; + Die.addValue(Attribute, Form, Value); +} + +void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, + DIE &VariableDie) { + StringRef Name = Var.getName(); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, Var.getVariable()); + addType(VariableDie, Var.getType()); + if (Var.isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); +} + +/// Add a Dwarf expression attribute data and value. +void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, + const MCExpr *Expr) { + DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); + Die.addValue((dwarf::Attribute)0, Form, Value); +} + +void DwarfCompileUnit::applySubprogramAttributesToDefinition(DISubprogram SP, + DIE &SPDie) { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); + applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes()); + addGlobalName(SP.getName(), SPDie, Context); +} + +bool DwarfCompileUnit::isDwoUnit() const { + return DD->useSplitDwarf() && Skeleton; +} + +bool DwarfCompileUnit::includeMinimalInlineScopes() const { + return getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly || + (DD->useSplitDwarf() && !Skeleton); +} +} // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 0000000..e521f39 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,250 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DwarfUnit.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfo.h" + +namespace llvm { + +class AsmPrinter; +class DIE; +class DwarfDebug; +class DwarfFile; +class MCSymbol; +class LexicalScope; + +class DwarfCompileUnit : public DwarfUnit { + /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding + /// the need to search for it in applyStmtList. + unsigned stmtListIndex; + + /// Skeleton unit associated with this unit. + DwarfCompileUnit *Skeleton; + + /// A label at the start of the non-dwo section related to this unit. + MCSymbol *SectionSym; + + /// The start of the unit within its section. + MCSymbol *LabelBegin; + + /// GlobalNames - A map of globally visible named entities for this unit. + StringMap GlobalNames; + + /// GlobalTypes - A map of globally visible types for this unit. + StringMap GlobalTypes; + + // List of range lists for a given compile unit, separate from the ranges for + // the CU itself. + SmallVector CURangeLists; + + // List of ranges for a given compile unit. + SmallVector CURanges; + + // The base address of this unit, if any. Used for relative references in + // ranges/locs. + const MCSymbol *BaseAddress; + + /// \brief Construct a DIE for the given DbgVariable without initializing the + /// DbgVariable's DIE reference. + std::unique_ptr constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract); + + bool isDwoUnit() const override; + + bool includeMinimalInlineScopes() const; + +public: + DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + + DwarfCompileUnit *getSkeleton() const { + return Skeleton; + } + + void initStmtList(MCSymbol *DwarfLineSectionSym); + + /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. + void applyStmtList(DIE &D); + + /// getOrCreateGlobalVariableDIE - get or create global variable DIE. + DIE *getOrCreateGlobalVariableDIE(DIGlobalVariable GV); + + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addLocalLabelAddress - Add a dwarf label attribute data and value using + /// DW_FORM_addr only. + void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addSectionDelta - Add a label delta attribute data and value. + void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + DwarfCompileUnit &getCU() override { return *this; } + + unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + + /// addRange - Add an address range to the list of ranges for this unit. + void addRange(RangeSpan Range); + + void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. + /// + void addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, const MCSymbol *Sec); + + /// \brief Find DIE for the given subprogram and attach appropriate + /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global + /// variables in this scope then create and insert DIEs for these + /// variables. + DIE &updateSubprogramScopeDIE(DISubprogram SP); + + void constructScopeDIE(LexicalScope *Scope, + SmallVectorImpl> &FinalChildren); + + /// \brief A helper function to construct a RangeSpanList for a given + /// lexical scope. + void addScopeRangeList(DIE &ScopeDIE, SmallVector Range); + + void attachRangesOrLowHighPC(DIE &D, SmallVector Ranges); + + void attachRangesOrLowHighPC(DIE &D, + const SmallVectorImpl &Ranges); + /// \brief This scope represents inlined body of a function. Construct + /// DIE to represent this concrete inlined copy of the function. + std::unique_ptr constructInlinedScopeDIE(LexicalScope *Scope); + + /// \brief Construct new DW_TAG_lexical_block for this scope and + /// attach DW_AT_low_pc/DW_AT_high_pc labels. + std::unique_ptr constructLexicalScopeDIE(LexicalScope *Scope); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + std::unique_ptr constructVariableDIE(DbgVariable &DV, + bool Abstract = false); + + std::unique_ptr constructVariableDIE(DbgVariable &DV, + const LexicalScope &Scope, + DIE *&ObjectPointer); + + /// A helper function to create children of a Scope DIE. + DIE *createScopeChildrenDIE(LexicalScope *Scope, + SmallVectorImpl> &Children, + unsigned *ChildScopeCount = nullptr); + + /// \brief Construct a DIE for this subprogram scope. + void constructSubprogramScopeDIE(LexicalScope *Scope); + + DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE); + + void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + + /// \brief Construct import_module DIE. + std::unique_ptr + constructImportedEntityDIE(const DIImportedEntity &Module); + + void finishSubprogramDefinition(DISubprogram SP); + + void collectDeadVariables(DISubprogram SP); + + /// Set the skeleton unit associated with this unit. + void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } + + MCSymbol *getSectionSym() const { + assert(Section); + return SectionSym; + } + + /// Pass in the SectionSym even though we could recreate it in every compile + /// unit (type units will have actually distinct symbols once they're in + /// comdat sections). + void initSection(const MCSection *Section, MCSymbol *SectionSym) { + DwarfUnit::initSection(Section); + this->SectionSym = SectionSym; + + // Don't bother labeling the .dwo unit, as its offset isn't used. + if (!Skeleton) + LabelBegin = + Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); + } + + unsigned getLength() { + return sizeof(uint32_t) + // Length field + getHeaderSize() + UnitDie.getSize(); + } + + void emitHeader(const MCSymbol *ASectionSym) const override; + + MCSymbol *getLabelBegin() const { + assert(Section); + return LabelBegin; + } + + /// Add a new global name to the compile unit. + void addGlobalName(StringRef Name, DIE &Die, DIScope Context) override; + + /// Add a new global type to the compile unit. + void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) override; + + const StringMap &getGlobalNames() const { return GlobalNames; } + const StringMap &getGlobalTypes() const { return GlobalTypes; } + + /// Add DW_AT_location attribute for a DbgVariable based on provided + /// MachineLocation. + void addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location); + /// Add an address attribute to a die based on the location provided. + void addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect = false); + + /// Start with the address based on the location provided, and generate the + /// DWARF information necessary to find the actual variable (navigating the + /// extra location information encoded in the type) based on the starting + /// location. Add the DWARF information to the die. + void addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// Add a Dwarf loclistptr attribute data and value. + void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index); + void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); + + /// Add a Dwarf expression attribute data and value. + void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); + + void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie); + + /// getRangeLists - Get the vector of range lists. + const SmallVectorImpl &getRangeLists() const { + return (Skeleton ? Skeleton : this)->CURangeLists; + } + + /// getRanges - Get the list of ranges for this unit. + const SmallVectorImpl &getRanges() const { return CURanges; } + SmallVector takeRanges() { return std::move(CURanges); } + + void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; } + const MCSymbol *getBaseAddress() const { return BaseAddress; } +}; + +} // end llvm namespace + +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 77860c0..230ea46 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -11,8 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "ByteStreamer.h" #include "DwarfDebug.h" + +#include "ByteStreamer.h" +#include "DwarfCompileUnit.h" #include "DIE.h" #include "DIEHash.h" #include "DwarfUnit.h" @@ -36,6 +38,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" @@ -47,6 +50,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "dwarfdebug" @@ -149,7 +153,7 @@ DIType DbgVariable::getType() const { if (tag == dwarf::DW_TAG_pointer_type) subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom()); - DIArray Elements = DICompositeType(subType).getTypeArray(); + DIArray Elements = DICompositeType(subType).getElements(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDerivedType DT(Elements.getElement(i)); if (getName() == DT.getName()) @@ -165,10 +169,11 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), FirstCU(nullptr), PrevLabel(nullptr), - GlobalRangeCount(0), InfoHolder(A, "info_string", DIEValueAllocator), + : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0), + InfoHolder(A, *this, "info_string", DIEValueAllocator), UsedNonDefaultText(false), - SkeletonHolder(A, "skel_string", DIEValueAllocator), + SkeletonHolder(A, *this, "skel_string", DIEValueAllocator), + IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)), AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, @@ -188,8 +193,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // Turn on accelerator tables for Darwin by default, pubnames by // default for non-Darwin, and handle split dwarf. - bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin(); - if (DwarfAccelTables == Default) HasDwarfAccelTables = IsDarwin; else @@ -308,26 +311,6 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) { return false; } -// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc -// and DW_AT_high_pc attributes. If there are global variables in this -// scope then create and insert DIEs for these variables. -DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, - DISubprogram SP) { - DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP); - - attachLowHighPC(SPCU, *SPDie, FunctionBeginSym, FunctionEndSym); - - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - SPCU.addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); - - // Add name to the name table, we do this here because we're guaranteed - // to have concrete versions of our DW_TAG_subprogram nodes. - addSubprogramNames(SP, *SPDie); - - return *SPDie; -} - /// Check whether we should create a DIE for the given Scope, return true /// if we don't create a DIE (the corresponding DIE is null). bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { @@ -344,271 +327,30 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { // We don't create a DIE if we have a single Range and the end label // is null. - SmallVectorImpl::const_iterator RI = Ranges.begin(); - MCSymbol *End = getLabelAfterInsn(RI->second); - return !End; -} - -static void addSectionLabel(AsmPrinter &Asm, DwarfUnit &U, DIE &D, - dwarf::Attribute A, const MCSymbol *L, - const MCSymbol *Sec) { - if (Asm.MAI->doesDwarfUseRelocationsAcrossSections()) - U.addSectionLabel(D, A, L); - else - U.addSectionDelta(D, A, L, Sec); -} - -void DwarfDebug::addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE, - const SmallVectorImpl &Range) { - // Emit offset in .debug_range as a relocatable label. emitDIE will handle - // emitting it appropriately. - MCSymbol *RangeSym = Asm->GetTempSymbol("debug_ranges", GlobalRangeCount++); - - // Under fission, ranges are specified by constant offsets relative to the - // CU's DW_AT_GNU_ranges_base. - if (useSplitDwarf()) - TheCU.addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym, - DwarfDebugRangeSectionSym); - else - addSectionLabel(*Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym, - DwarfDebugRangeSectionSym); - - RangeSpanList List(RangeSym); - for (const InsnRange &R : Range) { - RangeSpan Span(getLabelBeforeInsn(R.first), getLabelAfterInsn(R.second)); - List.addRange(std::move(Span)); - } - - // Add the range list to the set of ranges to be emitted. - TheCU.addRangeList(std::move(List)); -} - -void DwarfDebug::attachRangesOrLowHighPC(DwarfCompileUnit &TheCU, DIE &Die, - const SmallVectorImpl &Ranges) { - assert(!Ranges.empty()); - if (Ranges.size() == 1) - attachLowHighPC(TheCU, Die, getLabelBeforeInsn(Ranges.front().first), - getLabelAfterInsn(Ranges.front().second)); - else - addScopeRangeList(TheCU, Die, Ranges); -} - -// Construct new DW_TAG_lexical_block for this scope and attach -// DW_AT_low_pc/DW_AT_high_pc labels. -std::unique_ptr -DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { - if (isLexicalScopeDIENull(Scope)) - return nullptr; - - auto ScopeDIE = make_unique(dwarf::DW_TAG_lexical_block); - if (Scope->isAbstractScope()) - return ScopeDIE; - - attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges()); - - return ScopeDIE; + return !getLabelAfterInsn(Ranges.front().second); } -// This scope represents inlined body of a function. Construct DIE to -// represent this concrete inlined copy of the function. -std::unique_ptr -DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { - assert(Scope->getScopeNode()); - DIScope DS(Scope->getScopeNode()); - DISubprogram InlinedSP = getDISubprogram(DS); - // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram - // was inlined from another compile unit. - DIE *OriginDIE = AbstractSPDies[InlinedSP]; - assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); - - auto ScopeDIE = make_unique(dwarf::DW_TAG_inlined_subroutine); - TheCU.addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); - - attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges()); - - InlinedSubprogramDIEs.insert(OriginDIE); - - // Add the call site information to the DIE. - DILocation DL(Scope->getInlinedAt()); - TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, - TheCU.getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); - TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); - - // Add name to the name table, we do this here because we're guaranteed - // to have concrete versions of our DW_TAG_inlined_subprogram nodes. - addSubprogramNames(InlinedSP, *ScopeDIE); - - return ScopeDIE; -} - -static std::unique_ptr constructVariableDIE(DwarfCompileUnit &TheCU, - DbgVariable &DV, - const LexicalScope &Scope, - DIE *&ObjectPointer) { - auto Var = TheCU.constructVariableDIE(DV, Scope.isAbstractScope()); - if (DV.isObjectPointer()) - ObjectPointer = Var.get(); - return Var; -} - -DIE *DwarfDebug::createScopeChildrenDIE( - DwarfCompileUnit &TheCU, LexicalScope *Scope, - SmallVectorImpl> &Children) { - DIE *ObjectPointer = nullptr; - - // Collect arguments for current function. - if (LScopes.isCurrentFunctionScope(Scope)) { - for (DbgVariable *ArgDV : CurrentFnArguments) - if (ArgDV) - Children.push_back( - constructVariableDIE(TheCU, *ArgDV, *Scope, ObjectPointer)); - - // If this is a variadic function, add an unspecified parameter. - DISubprogram SP(Scope->getScopeNode()); - DIArray FnArgs = SP.getType().getTypeArray(); - if (FnArgs.getElement(FnArgs.getNumElements() - 1) - .isUnspecifiedParameter()) { - Children.push_back( - make_unique(dwarf::DW_TAG_unspecified_parameters)); - } - } - - // Collect lexical scope children first. - for (DbgVariable *DV : ScopeVariables.lookup(Scope)) - Children.push_back(constructVariableDIE(TheCU, *DV, *Scope, ObjectPointer)); - - for (LexicalScope *LS : Scope->getChildren()) - if (std::unique_ptr Nested = constructScopeDIE(TheCU, LS)) - Children.push_back(std::move(Nested)); - return ObjectPointer; +template void forBothCUs(DwarfCompileUnit &CU, Func F) { + F(CU); + if (auto *SkelCU = CU.getSkeleton()) + F(*SkelCU); } -void DwarfDebug::createAndAddScopeChildren(DwarfCompileUnit &TheCU, - LexicalScope *Scope, DIE &ScopeDIE) { - // We create children when the scope DIE is not null. - SmallVector, 8> Children; - if (DIE *ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children)) - TheCU.addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); - - // Add children - for (auto &I : Children) - ScopeDIE.addChild(std::move(I)); -} - -void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { +void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { assert(Scope && Scope->getScopeNode()); assert(Scope->isAbstractScope()); assert(!Scope->getInlinedAt()); - DISubprogram SP(Scope->getScopeNode()); + const MDNode *SP = Scope->getScopeNode(); ProcessedSPNodes.insert(SP); - DIE *&AbsDef = AbstractSPDies[SP]; - if (AbsDef) - return; - // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DwarfCompileUnit &SPCU = *SPMap[SP]; - DIE *ContextDIE; - - // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with - // the important distinction that the DIDescriptor is not associated with the - // DIE (since the DIDescriptor will be associated with the concrete DIE, if - // any). It could be refactored to some common utility function. - if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { - ContextDIE = &SPCU.getUnitDie(); - SPCU.getOrCreateSubprogramDIE(SPDecl); - } else - ContextDIE = SPCU.getOrCreateContextDIE(resolve(SP.getContext())); - - // Passing null as the associated DIDescriptor because the abstract definition - // shouldn't be found by lookup. - AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, - DIDescriptor()); - SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef); - - SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); - createAndAddScopeChildren(SPCU, Scope, *AbsDef); -} - -DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { - assert(Scope && Scope->getScopeNode()); - assert(!Scope->getInlinedAt()); - assert(!Scope->isAbstractScope()); - DISubprogram Sub(Scope->getScopeNode()); - - assert(Sub.isSubprogram()); - - ProcessedSPNodes.insert(Sub); - - DIE &ScopeDIE = updateSubprogramScopeDIE(TheCU, Sub); - - createAndAddScopeChildren(TheCU, Scope, ScopeDIE); - - return ScopeDIE; -} - -// Construct a DIE for this scope. -std::unique_ptr DwarfDebug::constructScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope) { - if (!Scope || !Scope->getScopeNode()) - return nullptr; - - DIScope DS(Scope->getScopeNode()); - - assert((Scope->getInlinedAt() || !DS.isSubprogram()) && - "Only handle inlined subprograms here, use " - "constructSubprogramScopeDIE for non-inlined " - "subprograms"); - - SmallVector, 8> Children; - - // We try to create the scope DIE first, then the children DIEs. This will - // avoid creating un-used children then removing them later when we find out - // the scope DIE is null. - std::unique_ptr ScopeDIE; - if (Scope->getParent() && DS.isSubprogram()) { - ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); - if (!ScopeDIE) - return nullptr; - // We create children when the scope DIE is not null. - createScopeChildrenDIE(TheCU, Scope, Children); - } else { - // Early exit when we know the scope DIE is going to be null. - if (isLexicalScopeDIENull(Scope)) - return nullptr; - - // We create children here when we know the scope DIE is not going to be - // null and the children will be added to the scope DIE. - createScopeChildrenDIE(TheCU, Scope, Children); - - // There is no need to emit empty lexical block DIE. - std::pair Range = - std::equal_range(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), - std::pair(DS, nullptr), - less_first()); - if (Children.empty() && Range.first == Range.second) - return nullptr; - ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); - assert(ScopeDIE && "Scope DIE should not be null."); - for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; - ++i) - constructImportedEntityDIE(TheCU, i->second, *ScopeDIE); - } - - // Add children - for (auto &I : Children) - ScopeDIE->addChild(std::move(I)); - - return ScopeDIE; + auto &CU = SPMap[SP]; + forBothCUs(*CU, [&](DwarfCompileUnit &CU) { + CU.constructAbstractSubprogramScopeDIE(Scope); + }); } void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { @@ -629,6 +371,8 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { DwarfCompileUnit &NewCU = *OwnedUnit; DIE &Die = NewCU.getUnitDie(); InfoHolder.addUnit(std::move(OwnedUnit)); + if (useSplitDwarf()) + NewCU.setSkeleton(constructSkeletonCU(NewCU)); // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table @@ -665,14 +409,10 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); - if (!FirstCU) - FirstCU = &NewCU; - - if (useSplitDwarf()) { + if (useSplitDwarf()) NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), DwarfInfoDWOSectionSym); - NewCU.setSkeleton(constructSkeletonCU(NewCU)); - } else + else NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), DwarfInfoSectionSym); @@ -681,44 +421,12 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { return NewCU; } -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, - const MDNode *N) { +void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const MDNode *N) { DIImportedEntity Module(N); assert(Module.Verify()); if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext())) - constructImportedEntityDIE(TheCU, Module, *D); -} - -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, - const MDNode *N, DIE &Context) { - DIImportedEntity Module(N); - assert(Module.Verify()); - return constructImportedEntityDIE(TheCU, Module, Context); -} - -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, - const DIImportedEntity &Module, - DIE &Context) { - assert(Module.Verify() && - "Use one of the MDNode * overloads to handle invalid metadata"); - DIE &IMDie = TheCU.createAndAddDIE(Module.getTag(), Context, Module); - DIE *EntityDie; - DIDescriptor Entity = resolve(Module.getEntity()); - if (Entity.isNameSpace()) - EntityDie = TheCU.getOrCreateNameSpace(DINameSpace(Entity)); - else if (Entity.isSubprogram()) - EntityDie = TheCU.getOrCreateSubprogramDIE(DISubprogram(Entity)); - else if (Entity.isType()) - EntityDie = TheCU.getOrCreateTypeDIE(DIType(Entity)); - else - EntityDie = TheCU.getDIE(Entity); - TheCU.addSourceLine(IMDie, Module.getLineNumber(), - Module.getContext().getFilename(), - Module.getContext().getDirectory()); - TheCU.addDIEEntry(IMDie, dwarf::DW_AT_import, *EntityDie); - StringRef Name = Module.getName(); - if (!Name.empty()) - TheCU.addString(IMDie, dwarf::DW_AT_name, Name); + D->addChild(TheCU.constructImportedEntityDIE(Module)); } // Emit all Dwarf sections that should come prior to the content. Create @@ -732,8 +440,6 @@ void DwarfDebug::beginModule() { FunctionDIs = makeSubprogramMap(*M); - // If module has named metadata anchors then use them, otherwise scan the - // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -756,13 +462,18 @@ void DwarfDebug::beginModule() { ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU.createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); + CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) SPMap.insert(std::make_pair(SPs.getElement(i), &CU)); DIArray EnumTypes = CUNode.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - CU.getOrCreateTypeDIE(EnumTypes.getElement(i)); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) { + DIType Ty(EnumTypes.getElement(i)); + // The enum types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + DIType UniqueTy(resolve(Ty.getRef())); + CU.getOrCreateTypeDIE(UniqueTy); + } DIArray RetainedTypes = CUNode.getRetainedTypes(); for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) { DIType Ty(RetainedTypes.getElement(i)); @@ -774,7 +485,7 @@ void DwarfDebug::beginModule() { // Emit imported_modules last so that the relevant context is already // available. for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) - constructImportedEntityDIE(CU, ImportedEntities.getElement(i)); + constructAndAddImportedEntityDIE(CU, ImportedEntities.getElement(i)); } // Tell MMI that we have debug info. @@ -787,9 +498,7 @@ void DwarfDebug::beginModule() { void DwarfDebug::finishVariableDefinitions() { for (const auto &Var : ConcreteVariables) { DIE *VariableDie = Var->getDIE(); - // FIXME: There shouldn't be any variables without DIEs. - if (!VariableDie) - continue; + assert(VariableDie); // FIXME: Consider the time-space tradeoff of just storing the unit pointer // in the ConcreteVariables list, rather than looking it up again here. // DIE::getUnit isn't simple - it walks parent pointers, etc. @@ -805,36 +514,10 @@ void DwarfDebug::finishVariableDefinitions() { } void DwarfDebug::finishSubprogramDefinitions() { - const Module *M = MMI->getModule(); - - NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - for (MDNode *N : CU_Nodes->operands()) { - DICompileUnit TheCU(N); - // Construct subprogram DIE and add variables DIEs. - DwarfCompileUnit *SPCU = - static_cast(CUMap.lookup(TheCU)); - DIArray Subprograms = TheCU.getSubprograms(); - for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { - DISubprogram SP(Subprograms.getElement(i)); - // Perhaps the subprogram is in another CU (such as due to comdat - // folding, etc), in which case ignore it here. - if (SPMap[SP] != SPCU) - continue; - DIE *D = SPCU->getDIE(SP); - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { - if (D) - // If this subprogram has an abstract definition, reference that - SPCU->addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); - } else { - if (!D) - // Lazily construct the subprogram if we didn't see either concrete or - // inlined versions during codegen. - D = SPCU->getOrCreateSubprogramDIE(SP); - // And attach the attributes - SPCU->applySubprogramAttributesToDefinition(SP, *D); - } - } - } + for (const auto &P : SPMap) + forBothCUs(*P.second, [&](DwarfCompileUnit &CU) { + CU.finishSubprogramDefinition(DISubprogram(P.first)); + }); } @@ -854,26 +537,7 @@ void DwarfDebug::collectDeadVariables() { DISubprogram SP(Subprograms.getElement(i)); if (ProcessedSPNodes.count(SP) != 0) continue; - assert(SP.isSubprogram() && - "CU's subprogram list contains a non-subprogram"); - assert(SP.isDefinition() && - "CU's subprogram list contains a subprogram declaration"); - DIArray Variables = SP.getVariables(); - if (Variables.getNumElements() == 0) - continue; - - DIE *SPDIE = AbstractSPDies.lookup(SP); - if (!SPDIE) - SPDIE = SPCU->getDIE(SP); - assert(SPDIE); - for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { - DIVariable DV(Variables.getElement(vi)); - assert(DV.isVariable()); - DbgVariable NewVar(DV, this); - auto VariableDie = SPCU->constructVariableDIE(NewVar); - SPCU->applyVariableAttributes(NewVar, *VariableDie); - SPDIE->addChild(std::move(VariableDie)); - } + SPCU->collectDeadVariables(SP); } } } @@ -889,66 +553,52 @@ void DwarfDebug::finalizeModuleInfo() { // Handle anything that needs to be done on a per-unit basis after // all other generation. - for (const auto &TheU : getUnits()) { + for (const auto &P : CUMap) { + auto &TheCU = *P.second; // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. - TheU->constructContainingTypeDIEs(); + TheCU.constructContainingTypeDIEs(); // Add CU specific attributes if we need to add any. - if (TheU->getUnitDie().getTag() == dwarf::DW_TAG_compile_unit) { - // If we're splitting the dwarf out now that we've got the entire - // CU then add the dwo id to it. - DwarfCompileUnit *SkCU = - static_cast(TheU->getSkeleton()); - if (useSplitDwarf()) { - // Emit a unique identifier for this CU. - uint64_t ID = DIEHash(Asm).computeCUSignature(TheU->getUnitDie()); - TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, ID); - - // We don't keep track of which addresses are used in which CU so this - // is a bit pessimistic under LTO. - if (!AddrPool.isEmpty()) - addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(), - dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym, - DwarfAddrSectionSym); - if (!TheU->getRangeLists().empty()) - addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(), - dwarf::DW_AT_GNU_ranges_base, - DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym); - } + // If we're splitting the dwarf out now that we've got the entire + // CU then add the dwo id to it. + auto *SkCU = TheCU.getSkeleton(); + if (useSplitDwarf()) { + // Emit a unique identifier for this CU. + uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie()); + TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + + // We don't keep track of which addresses are used in which CU so this + // is a bit pessimistic under LTO. + if (!AddrPool.isEmpty()) + SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, + DwarfAddrSectionSym, DwarfAddrSectionSym); + if (!SkCU->getRangeLists().empty()) + SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, + DwarfDebugRangeSectionSym, + DwarfDebugRangeSectionSym); + } - // If we have code split among multiple sections or non-contiguous - // ranges of code then emit a DW_AT_ranges attribute on the unit that will - // remain in the .o file, otherwise add a DW_AT_low_pc. - // FIXME: We should use ranges allow reordering of code ala - // .subsections_via_symbols in mach-o. This would mean turning on - // ranges for all subprogram DIEs for mach-o. - DwarfCompileUnit &U = - SkCU ? *SkCU : static_cast(*TheU); - unsigned NumRanges = TheU->getRanges().size(); - if (NumRanges) { - if (NumRanges > 1) { - addSectionLabel(*Asm, U, U.getUnitDie(), dwarf::DW_AT_ranges, - Asm->GetTempSymbol("cu_ranges", U.getUniqueID()), - DwarfDebugRangeSectionSym); - - // A DW_AT_low_pc attribute may also be specified in combination with - // DW_AT_ranges to specify the default base address for use in - // location lists (see Section 2.6.2) and range lists (see Section - // 2.17.3). - U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - 0); - } else { - RangeSpan &Range = TheU->getRanges().back(); - U.addLocalLabelAddress(U.getUnitDie(), dwarf::DW_AT_low_pc, - Range.getStart()); - U.addLabelDelta(U.getUnitDie(), dwarf::DW_AT_high_pc, Range.getEnd(), - Range.getStart()); - } - } + // If we have code split among multiple sections or non-contiguous + // ranges of code then emit a DW_AT_ranges attribute on the unit that will + // remain in the .o file, otherwise add a DW_AT_low_pc. + // FIXME: We should use ranges allow reordering of code ala + // .subsections_via_symbols in mach-o. This would mean turning on + // ranges for all subprogram DIEs for mach-o. + DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; + if (unsigned NumRanges = TheCU.getRanges().size()) { + if (NumRanges > 1) + // A DW_AT_low_pc attribute may also be specified in combination with + // DW_AT_ranges to specify the default base address for use in + // location lists (see Section 2.6.2) and range lists (see Section + // 2.17.3). + U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + else + TheCU.setBaseAddress(TheCU.getRanges().front().getStart()); + U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); } } @@ -1010,7 +660,10 @@ void DwarfDebug::endModule() { assert(CurFn == nullptr); assert(CurMI == nullptr); - if (!FirstCU) + // If we aren't actually generating debug info (check beginModule - + // conditionalized on !DisableDebugInfoPrinting and the presence of the + // llvm.dbg.cu metadata node) + if (!DwarfInfoSectionSym) return; // End any existing sections. @@ -1064,9 +717,6 @@ void DwarfDebug::endModule() { // clean up. SPMap.clear(); AbstractVariables.clear(); - - // Reset these for the next Module if we have one. - FirstCU = nullptr; } // Find abstract variable, if any, associated with Var. @@ -1092,8 +742,8 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) { void DwarfDebug::createAbstractVariable(const DIVariable &Var, LexicalScope *Scope) { - auto AbsDbgVariable = make_unique(Var, this); - addScopeVariable(Scope, AbsDbgVariable.get()); + auto AbsDbgVariable = make_unique(Var, DIExpression(), this); + InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); AbstractVariables[Var] = std::move(AbsDbgVariable); } @@ -1117,55 +767,35 @@ DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV, createAbstractVariable(Cleansed, Scope); } -// If Var is a current function argument then add it to CurrentFnArguments list. -bool DwarfDebug::addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope) { - if (!LScopes.isCurrentFunctionScope(Scope)) - return false; - DIVariable DV = Var->getVariable(); - if (DV.getTag() != dwarf::DW_TAG_arg_variable) - return false; - unsigned ArgNo = DV.getArgNumber(); - if (ArgNo == 0) - return false; - - size_t Size = CurrentFnArguments.size(); - if (Size == 0) - CurrentFnArguments.resize(CurFn->getFunction()->arg_size()); - // llvm::Function argument size is not good indicator of how many - // arguments does the function have at source level. - if (ArgNo > Size) - CurrentFnArguments.resize(ArgNo * 2); - CurrentFnArguments[ArgNo - 1] = Var; - return true; -} - // Collect variable information from side table maintained by MMI. void DwarfDebug::collectVariableInfoFromMMITable( - SmallPtrSet &Processed) { + SmallPtrSetImpl &Processed) { for (const auto &VI : MMI->getVariableDbgInfo()) { if (!VI.Var) continue; Processed.insert(VI.Var); - DIVariable DV(VI.Var); LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. if (!Scope) continue; + DIVariable DV(VI.Var); + DIExpression Expr(VI.Expr); ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); - ConcreteVariables.push_back(make_unique(DV, this)); + ConcreteVariables.push_back(make_unique(DV, Expr, this)); DbgVariable *RegVar = ConcreteVariables.back().get(); RegVar->setFrameIndex(VI.Slot); - addScopeVariable(Scope, RegVar); + InfoHolder.addScopeVariable(Scope, RegVar); } } // Get .debug_loc entry for the instruction range starting at MI. static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { + const MDNode *Expr = MI->getDebugExpression(); const MDNode *Var = MI->getDebugVariable(); - assert(MI->getNumOperands() == 3); + assert(MI->getNumOperands() == 4); if (MI->getOperand(0).isReg()) { MachineLocation MLoc; // If the second operand is an immediate, this is a @@ -1174,24 +804,138 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - return DebugLocEntry::Value(Var, MLoc); + return DebugLocEntry::Value(Var, Expr, MLoc); } if (MI->getOperand(0).isImm()) - return DebugLocEntry::Value(Var, MI->getOperand(0).getImm()); + return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getImm()); if (MI->getOperand(0).isFPImm()) - return DebugLocEntry::Value(Var, MI->getOperand(0).getFPImm()); + return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getFPImm()); if (MI->getOperand(0).isCImm()) - return DebugLocEntry::Value(Var, MI->getOperand(0).getCImm()); + return DebugLocEntry::Value(Var, Expr, MI->getOperand(0).getCImm()); - llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); + llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); } -// Find variables for each lexical scope. +/// Determine whether two variable pieces overlap. +static bool piecesOverlap(DIExpression P1, DIExpression P2) { + if (!P1.isVariablePiece() || !P2.isVariablePiece()) + return true; + unsigned l1 = P1.getPieceOffset(); + unsigned l2 = P2.getPieceOffset(); + unsigned r1 = l1 + P1.getPieceSize(); + unsigned r2 = l2 + P2.getPieceSize(); + // True where [l1,r1[ and [r1,r2[ overlap. + return (l1 < r2) && (l2 < r1); +} + +/// Build the location list for all DBG_VALUEs in the function that +/// describe the same variable. If the ranges of several independent +/// pieces of the same variable overlap partially, split them up and +/// combine the ranges. The resulting DebugLocEntries are will have +/// strict monotonically increasing begin addresses and will never +/// overlap. +// +// Input: +// +// Ranges History [var, loc, piece ofs size] +// 0 | [x, (reg0, piece 0, 32)] +// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry +// 2 | | ... +// 3 | [clobber reg0] +// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of x. +// +// Output: +// +// [0-1] [x, (reg0, piece 0, 32)] +// [1-3] [x, (reg0, piece 0, 32), (reg1, piece 32, 32)] +// [3-4] [x, (reg1, piece 32, 32)] +// [4- ] [x, (mem, piece 0, 64)] void -DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); +DwarfDebug::buildLocationList(SmallVectorImpl &DebugLoc, + const DbgValueHistoryMap::InstrRanges &Ranges) { + SmallVector OpenRanges; + + for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const MachineInstr *Begin = I->first; + const MachineInstr *End = I->second; + assert(Begin->isDebugValue() && "Invalid History entry"); + + // Check if a variable is inaccessible in this range. + if (Begin->getNumOperands() > 1 && + Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) { + OpenRanges.clear(); + continue; + } + + // If this piece overlaps with any open ranges, truncate them. + DIExpression DIExpr = Begin->getDebugExpression(); + auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](DebugLocEntry::Value R) { + return piecesOverlap(DIExpr, R.getExpression()); + }); + OpenRanges.erase(Last, OpenRanges.end()); + + const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); + assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); + + const MCSymbol *EndLabel; + if (End != nullptr) + EndLabel = getLabelAfterInsn(End); + else if (std::next(I) == Ranges.end()) + EndLabel = FunctionEndSym; + else + EndLabel = getLabelBeforeInsn(std::next(I)->first); + assert(EndLabel && "Forgot label after instruction ending a range!"); + DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); + + auto Value = getDebugLocValue(Begin); + DebugLocEntry Loc(StartLabel, EndLabel, Value); + bool couldMerge = false; + + // If this is a piece, it may belong to the current DebugLocEntry. + if (DIExpr.isVariablePiece()) { + // Add this value to the list of open ranges. + OpenRanges.push_back(Value); + + // Attempt to add the piece to the last entry. + if (!DebugLoc.empty()) + if (DebugLoc.back().MergeValues(Loc)) + couldMerge = true; + } + + if (!couldMerge) { + // Need to add a new DebugLocEntry. Add all values from still + // valid non-overlapping pieces. + if (OpenRanges.size()) + Loc.addValues(OpenRanges); + + DebugLoc.push_back(std::move(Loc)); + } + + // Attempt to coalesce the ranges of two otherwise identical + // DebugLocEntries. + auto CurEntry = DebugLoc.rbegin(); + auto PrevEntry = std::next(CurEntry); + if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry)) + DebugLoc.pop_back(); + + DEBUG({ + dbgs() << CurEntry->getValues().size() << " Values:\n"; + for (auto Value : CurEntry->getValues()) { + Value.getVariable()->dump(); + Value.getExpression()->dump(); + } + dbgs() << "-----\n"; + }); + } +} + + +// Find variables for each lexical scope. +void +DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, + SmallPtrSetImpl &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(Processed); @@ -1206,10 +950,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { continue; LexicalScope *Scope = nullptr; - if (DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(DV.getContext()).describes(CurFn->getFunction())) - Scope = LScopes.getCurrentFunctionScope(); - else if (MDNode *IA = DV.getInlinedAt()) { + if (MDNode *IA = DV.getInlinedAt()) { DebugLoc DL = DebugLoc::getFromDILocation(IA); Scope = LScopes.findInlinedScope(DebugLoc::get( DL.getLine(), DL.getCol(), DV.getContext(), IA)); @@ -1225,7 +966,7 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); ConcreteVariables.push_back(make_unique(MInsn, this)); DbgVariable *RegVar = ConcreteVariables.back().get(); - addScopeVariable(Scope, RegVar); + InfoHolder.addScopeVariable(Scope, RegVar); // Check if the first DBG_VALUE is valid for the rest of the function. if (Ranges.size() == 1 && Ranges.front().second == nullptr) @@ -1236,53 +977,26 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1); DebugLocList &LocList = DotDebugLocEntries.back(); + LocList.CU = &TheCU; LocList.Label = Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); - SmallVector &DebugLoc = LocList.List; - for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { - const MachineInstr *Begin = I->first; - const MachineInstr *End = I->second; - assert(Begin->isDebugValue() && "Invalid History entry"); - - // Check if a variable is unaccessible in this range. - if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && - !Begin->getOperand(0).getReg()) - continue; - DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin); - if (End != nullptr) - DEBUG(dbgs() << "\t" << *End); - else - DEBUG(dbgs() << "\tNULL\n"); - - const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); - assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); - - const MCSymbol *EndLabel; - if (End != nullptr) - EndLabel = getLabelAfterInsn(End); - else if (std::next(I) == Ranges.end()) - EndLabel = FunctionEndSym; - else - EndLabel = getLabelBeforeInsn(std::next(I)->first); - assert(EndLabel && "Forgot label after instruction ending a range!"); - DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU); - if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc)) - DebugLoc.push_back(std::move(Loc)); - } + // Build the location list for this variable. + buildLocationList(LocList.List, Ranges); } // Collect info for variables that were optimized out. - DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); + DIArray Variables = SP.getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); assert(DV.isVariable()); - if (!Processed.insert(DV)) + if (!Processed.insert(DV).second) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) { ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); - ConcreteVariables.push_back(make_unique(DV, this)); - addScopeVariable(Scope, ConcreteVariables.back().get()); + DIExpression NoExpr; + ConcreteVariables.push_back(make_unique(DV, NoExpr, this)); + InfoHolder.addScopeVariable(Scope, ConcreteVariables.back().get()); } } } @@ -1458,7 +1172,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { Asm->OutStreamer.EmitLabel(FunctionBeginSym); // Calculate history for local variables. - calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues); + calculateDbgValueHistory(MF, Asm->TM.getSubtargetImpl()->getRegisterInfo(), + DbgValues); // Request labels for the full history. for (const auto &I : DbgValues) { @@ -1468,10 +1183,24 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // The first mention of a function argument gets the FunctionBeginSym // label, so arguments are visible when breaking at function entry. - DIVariable DV(I.first); - if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && - getDISubprogram(DV.getContext()).describes(MF->getFunction())) + DIVariable DIVar(Ranges.front().first->getDebugVariable()); + if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) { LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; + if (Ranges.front().first->getDebugExpression().isVariablePiece()) { + // Mark all non-overlapping initial pieces. + for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { + DIExpression Piece = I->first->getDebugExpression(); + if (std::all_of(Ranges.begin(), I, + [&](DbgValueHistoryMap::InstrRange Pred) { + return !piecesOverlap(Piece, Pred.first->getDebugExpression()); + })) + LabelsBeforeInsn[I->first] = FunctionBeginSym; + else + break; + } + } + } for (const auto &Range : Ranges) { requestLabelBeforeInsn(Range.first); @@ -1497,56 +1226,16 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } } -void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { - if (addCurrentFnArgument(Var, LS)) - return; - SmallVectorImpl &Vars = ScopeVariables[LS]; - DIVariable DV = Var->getVariable(); - // Variables with positive arg numbers are parameters. - if (unsigned ArgNum = DV.getArgNumber()) { - // Keep all parameters in order at the start of the variable list to ensure - // function types are correct (no out-of-order parameters) - // - // This could be improved by only doing it for optimized builds (unoptimized - // builds have the right order to begin with), searching from the back (this - // would catch the unoptimized case quickly), or doing a binary search - // rather than linear search. - SmallVectorImpl::iterator I = Vars.begin(); - while (I != Vars.end()) { - unsigned CurNum = (*I)->getVariable().getArgNumber(); - // A local (non-parameter) variable has been found, insert immediately - // before it. - if (CurNum == 0) - break; - // A later indexed parameter has been found, insert immediately before it. - if (CurNum > ArgNum) - break; - ++I; - } - Vars.insert(I, Var); - return; - } - - Vars.push_back(Var); -} - // Gather and emit post-function debug information. void DwarfDebug::endFunction(const MachineFunction *MF) { - // Every beginFunction(MF) call should be followed by an endFunction(MF) call, - // though the beginFunction may not be called at all. - // We should handle both cases. - if (!CurFn) - CurFn = MF; - else - assert(CurFn == MF); - assert(CurFn != nullptr); + assert(CurFn == MF && + "endFunction should be called with the same function as beginFunction"); if (!MMI->hasDebugInfo() || LScopes.empty() || !FunctionDIs.count(MF->getFunction())) { // If we don't have a lexical scope for this function then there will // be a hole in the range information. Keep note of this by setting the // previously used section to nullptr. - PrevSection = nullptr; PrevCU = nullptr; CurFn = nullptr; return; @@ -1560,45 +1249,63 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Set DwarfDwarfCompileUnitID in MCContext to default value. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + DISubprogram SP(FnScope->getScopeNode()); + DwarfCompileUnit &TheCU = *SPMap.lookup(SP); + SmallPtrSet ProcessedVars; - collectVariableInfo(ProcessedVars); + collectVariableInfo(TheCU, SP, ProcessedVars); - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - DwarfCompileUnit &TheCU = *SPMap.lookup(FnScope->getScopeNode()); + // Add the range of this function to the list of ranges for the CU. + TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym)); + + // Under -gmlt, skip building the subprogram if there are no inlined + // subroutines inside it. + if (TheCU.getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly && + LScopes.getAbstractScopesList().empty() && !IsDarwin) { + assert(InfoHolder.getScopeVariables().empty()); + assert(DbgValues.empty()); + // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed + // by a -gmlt CU. Add a test and remove this assertion. + assert(AbstractVariables.empty()); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); + PrevLabel = nullptr; + CurFn = nullptr; + return; + } +#ifndef NDEBUG + size_t NumAbstractScopes = LScopes.getAbstractScopesList().size(); +#endif // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { DISubprogram SP(AScope->getScopeNode()); - if (!SP.isSubprogram()) - continue; + assert(SP.isSubprogram()); // Collect info for variables that were optimized out. DIArray Variables = SP.getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); assert(DV && DV.isVariable()); - if (!ProcessedVars.insert(DV)) + if (!ProcessedVars.insert(DV).second) continue; ensureAbstractVariableIsCreated(DV, DV.getContext()); + assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes + && "ensureAbstractVariableIsCreated inserted abstract scopes"); } - constructAbstractSubprogramScopeDIE(TheCU, AScope); + constructAbstractSubprogramScopeDIE(AScope); } - DIE &CurFnDIE = constructSubprogramScopeDIE(TheCU, FnScope); - if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn)) - TheCU.addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); - - // Add the range of this function to the list of ranges for the CU. - RangeSpan Span(FunctionBeginSym, FunctionEndSym); - TheCU.addRange(std::move(Span)); - PrevSection = Asm->getCurrentSection(); - PrevCU = &TheCU; + TheCU.constructSubprogramScopeDIE(FnScope); + if (auto *SkelCU = TheCU.getSkeleton()) + if (!LScopes.getAbstractScopesList().empty()) + SkelCU->constructSubprogramScopeDIE(FnScope); // Clear debug info // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the // DbgVariables except those that are also in AbstractVariables (since they // can be used cross-function) - ScopeVariables.clear(); - CurrentFnArguments.clear(); + InfoHolder.getScopeVariables().clear(); DbgValues.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); @@ -1618,8 +1325,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, assert(Scope.isScope()); Fn = Scope.getFilename(); Dir = Scope.getDirectory(); - if (Scope.isLexicalBlock()) - Discriminator = DILexicalBlock(S).getDiscriminator(); + if (Scope.isLexicalBlockFile()) + Discriminator = DILexicalBlockFile(S).getDiscriminator(); unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID(); Src = static_cast(*InfoHolder.getUnits()[CUID]) @@ -1640,9 +1347,12 @@ void DwarfDebug::emitSectionLabels() { // Dwarf sections base addresses. DwarfInfoSectionSym = emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); - if (useSplitDwarf()) + if (useSplitDwarf()) { DwarfInfoDWOSectionSym = emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo"); + DwarfTypesDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo"); + } DwarfAbbrevSectionSym = emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); if (useSplitDwarf()) @@ -1726,7 +1436,7 @@ void DwarfDebug::emitDIE(DIE &Die) { void DwarfDebug::emitDebugInfo() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Holder.emitUnits(this, DwarfAbbrevSectionSym); + Holder.emitUnits(DwarfAbbrevSectionSym); } // Emit the abbreviation section. @@ -1760,54 +1470,41 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); } -// Emit visible names into a hashed accelerator table section. -void DwarfDebug::emitAccelNames() { - AccelNames.FinalizeTable(Asm, "Names"); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelNamesSection()); - MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); +void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section, + StringRef TableName, StringRef SymName) { + Accel.FinalizeTable(Asm, TableName); + Asm->OutStreamer.SwitchSection(Section); + auto *SectionBegin = Asm->GetTempSymbol(SymName); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AccelNames.Emit(Asm, SectionBegin, &InfoHolder); + Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym); +} + +// Emit visible names into a hashed accelerator table section. +void DwarfDebug::emitAccelNames() { + emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(), + "Names", "names_begin"); } // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - AccelObjC.FinalizeTable(Asm, "ObjC"); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelObjCSection()); - MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); - Asm->OutStreamer.EmitLabel(SectionBegin); - - // Emit the full data. - AccelObjC.Emit(Asm, SectionBegin, &InfoHolder); + emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(), + "ObjC", "objc_begin"); } // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - AccelNamespace.FinalizeTable(Asm, "namespac"); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelNamespaceSection()); - MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); - Asm->OutStreamer.EmitLabel(SectionBegin); - - // Emit the full data. - AccelNamespace.Emit(Asm, SectionBegin, &InfoHolder); + emitAccel(AccelNamespace, + Asm->getObjFileLowering().getDwarfAccelNamespaceSection(), + "namespac", "namespac_begin"); } // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { - - AccelTypes.FinalizeTable(Asm, "types"); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAccelTypesSection()); - MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); - Asm->OutStreamer.EmitLabel(SectionBegin); - - // Emit the full data. - AccelTypes.Emit(Asm, SectionBegin, &InfoHolder); + emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(), + "types", "types_begin"); } // Public name handling. @@ -1874,12 +1571,13 @@ void DwarfDebug::emitDebugPubNames(bool GnuStyle) { GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() : Asm->getObjFileLowering().getDwarfPubNamesSection(); - emitDebugPubSection(GnuStyle, PSec, "Names", &DwarfUnit::getGlobalNames); + emitDebugPubSection(GnuStyle, PSec, "Names", + &DwarfCompileUnit::getGlobalNames); } void DwarfDebug::emitDebugPubSection( bool GnuStyle, const MCSection *PSec, StringRef Name, - const StringMap &(DwarfUnit::*Accessor)() const) { + const StringMap &(DwarfCompileUnit::*Accessor)() const) { for (const auto &NU : CUMap) { DwarfCompileUnit *TheU = NU.second; @@ -1888,7 +1586,7 @@ void DwarfDebug::emitDebugPubSection( if (Globals.empty()) continue; - if (auto Skeleton = static_cast(TheU->getSkeleton())) + if (auto *Skeleton = TheU->getSkeleton()) TheU = Skeleton; unsigned ID = TheU->getUniqueID(); @@ -1910,7 +1608,7 @@ void DwarfDebug::emitDebugPubSection( Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym()); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(TheU->getLabelEnd(), TheU->getLabelBegin(), 4); + Asm->EmitInt32(TheU->getLength()); // Emit the pubnames for this compilation unit. for (const auto &GI : Globals) { @@ -1943,7 +1641,8 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() : Asm->getObjFileLowering().getDwarfPubTypesSection(); - emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes); + emitDebugPubSection(GnuStyle, PSec, "Types", + &DwarfCompileUnit::getGlobalTypes); } // Emit visible names into a debug str section. @@ -1952,12 +1651,67 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } +/// Emits an optimal (=sorted) sequence of DW_OP_pieces. +void DwarfDebug::emitLocPieces(ByteStreamer &Streamer, + const DITypeIdentifierMap &Map, + ArrayRef Values) { + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { + return P.isVariablePiece(); + }) && "all values are expected to be pieces"); + assert(std::is_sorted(Values.begin(), Values.end()) && + "pieces are expected to be sorted"); + + unsigned Offset = 0; + for (auto Piece : Values) { + DIExpression Expr = Piece.getExpression(); + unsigned PieceOffset = Expr.getPieceOffset(); + unsigned PieceSize = Expr.getPieceSize(); + assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); + if (Offset < PieceOffset) { + // The DWARF spec seriously mandates pieces with no locations for gaps. + Asm->EmitDwarfOpPiece(Streamer, (PieceOffset-Offset)*8); + Offset += PieceOffset-Offset; + } + + Offset += PieceSize; + + const unsigned SizeOfByte = 8; +#ifndef NDEBUG + DIVariable Var = Piece.getVariable(); + assert(!Var.isIndirect() && "indirect address for piece"); + unsigned VarSize = Var.getSizeInBits(Map); + assert(PieceSize+PieceOffset <= VarSize/SizeOfByte + && "piece is larger than or outside of variable"); + assert(PieceSize*SizeOfByte != VarSize + && "piece covers entire variable"); +#endif + if (Piece.isLocation() && Piece.getLoc().isReg()) + Asm->EmitDwarfRegOpPiece(Streamer, + Piece.getLoc(), + PieceSize*SizeOfByte); + else { + emitDebugLocValue(Streamer, Piece); + Asm->EmitDwarfOpPiece(Streamer, PieceSize*SizeOfByte); + } + } +} + + void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry) { - assert(Entry.getValues().size() == 1 && - "multi-value entries are not supported yet."); const DebugLocEntry::Value Value = Entry.getValues()[0]; - DIVariable DV(Value.getVariable()); + if (Value.isVariablePiece()) + // Emit all pieces that belong to the same variable and range. + return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues()); + + assert(Entry.getValues().size() == 1 && "only pieces may have >1 value"); + emitDebugLocValue(Streamer, Value); +} + +void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer, + const DebugLocEntry::Value &Value) { + DIVariable DV = Value.getVariable(); + // Regular entry. if (Value.isInt()) { DIBasicType BTy(resolve(DV.getType())); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || @@ -1970,24 +1724,25 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, } } else if (Value.isLocation()) { MachineLocation Loc = Value.getLoc(); - if (!DV.hasComplexAddress()) + DIExpression Expr = Value.getExpression(); + if (!Expr) // Regular entry. Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); else { // Complex address entry. - unsigned N = DV.getNumAddrElements(); + unsigned N = Expr.getNumElements(); unsigned i = 0; - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + if (N >= 2 && Expr.getElement(0) == dwarf::DW_OP_plus) { if (Loc.getOffset()) { i = 2; Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); - Streamer.EmitSLEB128(DV.getAddrElement(1)); + Streamer.EmitSLEB128(Expr.getElement(1)); } else { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); + MachineLocation TLoc(Loc.getReg(), Expr.getElement(1)); Asm->EmitDwarfRegOp(Streamer, TLoc, DV.isIndirect()); i = 2; } @@ -1997,13 +1752,16 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, // Emit remaining complex address elements. for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { + uint64_t Element = Expr.getElement(i); + if (Element == dwarf::DW_OP_plus) { Streamer.EmitInt8(dwarf::DW_OP_plus_uconst, "DW_OP_plus_uconst"); - Streamer.EmitULEB128(DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { + Streamer.EmitULEB128(Expr.getElement(++i)); + } else if (Element == dwarf::DW_OP_deref) { if (!Loc.isReg()) Streamer.EmitInt8(dwarf::DW_OP_deref, "DW_OP_deref"); + } else if (Element == dwarf::DW_OP_piece) { + i += 3; + // handled in emitDebugLocEntry. } else llvm_unreachable("unknown Opcode found in complex address"); } @@ -2035,14 +1793,12 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getDataLayout().getPointerSize(); for (const auto &DebugLoc : DotDebugLocEntries) { Asm->OutStreamer.EmitLabel(DebugLoc.Label); + const DwarfCompileUnit *CU = DebugLoc.CU; for (const auto &Entry : DebugLoc.List) { // Set up the range. This range is relative to the entry point of the // compile unit. This is a hard coded 0 for low_pc when we're emitting // ranges, or the DW_AT_low_pc on the compile unit otherwise. - const DwarfCompileUnit *CU = Entry.getCU(); - if (CU->getRanges().size() == 1) { - // Grab the begin symbol from the first range as our base. - const MCSymbol *Base = CU->getRanges()[0].getStart(); + if (auto *Base = CU->getBaseAddress()) { Asm->EmitLabelDifference(Entry.getBeginSym(), Base, Size); Asm->EmitLabelDifference(Entry.getEndSym(), Base, Size); } else { @@ -2172,6 +1928,10 @@ void DwarfDebug::emitDebugARanges() { for (DwarfCompileUnit *CU : CUs) { std::vector &List = Spans[CU]; + // Describe the skeleton CU's offset and length, not the dwo file's. + if (auto *Skel = CU->getSkeleton()) + CU = Skel; + // Emit size of content not including length itself. unsigned ContentSize = sizeof(int16_t) + // DWARF ARange version number @@ -2194,7 +1954,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer.AddComment("DWARF Arange version number"); Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); - Asm->EmitSectionOffset(CU->getLocalLabelBegin(), CU->getLocalSectionSym()); + Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym()); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(PtrSize); Asm->OutStreamer.AddComment("Segment Size (in bytes)"); @@ -2238,6 +1998,9 @@ void DwarfDebug::emitDebugRanges() { for (const auto &I : CUMap) { DwarfCompileUnit *TheCU = I.second; + if (auto *Skel = TheCU->getSkeleton()) + TheCU = Skel; + // Iterate over the misc ranges for the compile units in the module. for (const RangeSpanList &List : TheCU->getRangeLists()) { // Emit our symbol so we can find the beginning of the range. @@ -2248,9 +2011,7 @@ void DwarfDebug::emitDebugRanges() { const MCSymbol *End = Range.getEnd(); assert(Begin && "Range without a begin symbol?"); assert(End && "Range without an end symbol?"); - if (TheCU->getRanges().size() == 1) { - // Grab the begin symbol from the first range as our base. - const MCSymbol *Base = TheCU->getRanges()[0].getStart(); + if (auto *Base = TheCU->getBaseAddress()) { Asm->EmitLabelDifference(Begin, Base, Size); Asm->EmitLabelDifference(End, Base, Size); } else { @@ -2263,23 +2024,6 @@ void DwarfDebug::emitDebugRanges() { Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitIntValue(0, Size); } - - // Now emit a range for the CU itself. - if (TheCU->getRanges().size() > 1) { - Asm->OutStreamer.EmitLabel( - Asm->GetTempSymbol("cu_ranges", TheCU->getUniqueID())); - for (const RangeSpan &Range : TheCU->getRanges()) { - const MCSymbol *Begin = Range.getStart(); - const MCSymbol *End = Range.getEnd(); - assert(Begin && "Range without a begin symbol?"); - assert(End && "Range without an end symbol?"); - Asm->OutStreamer.EmitSymbolValue(Begin, Size); - Asm->OutStreamer.EmitSymbolValue(End, Size); - } - // And terminate the list with two 0 values. - Asm->OutStreamer.EmitIntValue(0, Size); - Asm->OutStreamer.EmitIntValue(0, Size); - } } } @@ -2287,11 +2031,11 @@ void DwarfDebug::emitDebugRanges() { void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr NewU) { - NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - U.getCUNode().getSplitDebugFilename()); + NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, + U.getCUNode().getSplitDebugFilename()); if (!CompilationDir.empty()) - NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); addGnuPubAttributes(*NewU, Die); @@ -2316,31 +2060,13 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { return NewCU; } -// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name, -// DW_AT_addr_base. -DwarfTypeUnit &DwarfDebug::constructSkeletonTU(DwarfTypeUnit &TU) { - DwarfCompileUnit &CU = static_cast( - *SkeletonHolder.getUnits()[TU.getCU().getUniqueID()]); - - auto OwnedUnit = make_unique(TU.getUniqueID(), CU, Asm, this, - &SkeletonHolder); - DwarfTypeUnit &NewTU = *OwnedUnit; - NewTU.setTypeSignature(TU.getTypeSignature()); - NewTU.setType(nullptr); - NewTU.initSection( - Asm->getObjFileLowering().getDwarfTypesSection(TU.getTypeSignature())); - - initSkeletonUnit(TU, NewTU.getUnitDie(), std::move(OwnedUnit)); - return NewTU; -} - // Emit the .debug_info.dwo section for separated dwarf. This contains the // compile units that would normally be in debug_info. void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); // Don't pass an abbrev symbol, using a constant zero instead so as not to // emit relocations into the dwo file. - InfoHolder.emitUnits(this, /* AbbrevSymbol */ nullptr); + InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the @@ -2364,9 +2090,8 @@ void DwarfDebug::emitDebugStrDWO() { assert(useSplitDwarf() && "No split dwarf?"); const MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection(); - const MCSymbol *StrSym = DwarfStrSectionSym; InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), - OffSec, StrSym); + OffSec); } MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { @@ -2406,9 +2131,9 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); - auto OwnedUnit = - make_unique(InfoHolder.getUnits().size(), CU, Asm, this, - &InfoHolder, getDwoLineTable(CU)); + auto OwnedUnit = make_unique( + InfoHolder.getUnits().size() + TypeUnitsUnderConstruction.size(), CU, Asm, + this, &InfoHolder, getDwoLineTable(CU)); DwarfTypeUnit &NewTU = *OwnedUnit; DIE &UnitDie = NewTU.getUnitDie(); TU = &NewTU; @@ -2421,15 +2146,13 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, uint64_t Signature = makeTypeSignature(Identifier); NewTU.setTypeSignature(Signature); - if (!useSplitDwarf()) + if (useSplitDwarf()) + NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); + else { CU.applyStmtList(UnitDie); - - // FIXME: Skip using COMDAT groups for type units in the .dwo file once tools - // such as DWP ( http://gcc.gnu.org/wiki/DebugFissionDWP ) can cope with it. - NewTU.initSection( - useSplitDwarf() - ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature) - : Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + NewTU.initSection( + Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + } NewTU.setType(NewTU.createTypeDIE(CTy)); @@ -2457,29 +2180,12 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, // If the type wasn't dependent on fission addresses, finish adding the type // and all its dependent types. - for (auto &TU : TypeUnitsToAdd) { - if (useSplitDwarf()) - TU.first->setSkeleton(constructSkeletonTU(*TU.first)); + for (auto &TU : TypeUnitsToAdd) InfoHolder.addUnit(std::move(TU.first)); - } } CU.addDIETypeSignature(RefDie, NewTU); } -void DwarfDebug::attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, - MCSymbol *Begin, MCSymbol *End) { - assert(Begin && "Begin label should not be null!"); - assert(End && "End label should not be null!"); - assert(Begin->isDefined() && "Invalid starting label"); - assert(End->isDefined() && "Invalid end label"); - - Unit.addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); - if (DwarfVersion < 4) - Unit.addLabelAddress(D, dwarf::DW_AT_high_pc, End); - else - Unit.addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); -} - // Accelerator table mutators - add each name along with its companion // DIE to the proper table while ensuring that the name that we're going // to reference is in the string table. We do this since the names we diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index ffe4843..48c2809 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ -#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H #include "DwarfFile.h" #include "AsmPrinterHandler.h" @@ -70,6 +70,7 @@ public: /// \brief This class is used to track local variable information. class DbgVariable { DIVariable Var; // Variable Descriptor. + DIExpression Expr; // Complex address location expression. DIE *TheDIE; // Variable DIE. unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. @@ -78,18 +79,22 @@ class DbgVariable { public: /// Construct a DbgVariable from a DIVariable. - DbgVariable(DIVariable V, DwarfDebug *DD) - : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr), - FrameIndex(~0), DD(DD) {} + DbgVariable(DIVariable V, DIExpression E, DwarfDebug *DD) + : Var(V), Expr(E), TheDIE(nullptr), DotDebugLocOffset(~0U), + MInsn(nullptr), FrameIndex(~0), DD(DD) { + assert(Var.Verify() && Expr.Verify()); + } /// Construct a DbgVariable from a DEBUG_VALUE. /// AbstractVar may be NULL. DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD) - : Var(DbgValue->getDebugVariable()), TheDIE(nullptr), - DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {} + : Var(DbgValue->getDebugVariable()), Expr(DbgValue->getDebugExpression()), + TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(DbgValue), + FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } + DIExpression getExpression() const { return Expr; } void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } @@ -124,14 +129,14 @@ public: bool variableHasComplexAddress() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Var.hasComplexAddress(); + return Expr.getNumElements() > 0; } bool isBlockByrefVariable() const; unsigned getNumAddrElements() const { assert(Var.isVariable() && "Invalid complex DbgVariable!"); - return Var.getNumAddrElements(); + return Expr.getNumElements(); } - uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); } + uint64_t getAddrElement(unsigned i) const { return Expr.getElement(i); } DIType getType() const; private: @@ -159,26 +164,15 @@ class DwarfDebug : public AsmPrinterHandler { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - // Handle to the compile unit used for the inline extension handling, - // this is just so that the DIEValue allocator has a place to store - // the particular elements. - // FIXME: Store these off of DwarfDebug instead? - DwarfCompileUnit *FirstCU; - // Maps MDNode with its corresponding DwarfCompileUnit. MapVector CUMap; // Maps subprogram MDNode with its corresponding DwarfCompileUnit. - DenseMap SPMap; + MapVector SPMap; // Maps a CU DIE with its corresponding DwarfCompileUnit. DenseMap CUDieMap; - /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can - /// be shared across CUs, that is why we keep the map here instead - /// of in DwarfCompileUnit. - DenseMap MDTypeNodeToDieMap; - // List of all labels used in aranges generation. std::vector ArangeLabels; @@ -189,19 +183,8 @@ class DwarfDebug : public AsmPrinterHandler { typedef DenseMap > SectionMapType; SectionMapType SectionMap; - // List of arguments for current function. - SmallVector CurrentFnArguments; - LexicalScopes LScopes; - // Collection of abstract subprogram DIEs. - DenseMap AbstractSPDies; - - // Collection of dbg variables of a scope. - typedef DenseMap > - ScopeVariablesMap; - ScopeVariablesMap ScopeVariables; - // Collection of abstract variables. DenseMap> AbstractVariables; SmallVector, 64> ConcreteVariables; @@ -210,10 +193,6 @@ class DwarfDebug : public AsmPrinterHandler { // can refer to them in spite of insertions into this list. SmallVector DotDebugLocEntries; - // Collection of subprogram DIEs that are marked (at the end of the module) - // as DW_AT_inline. - SmallPtrSet InlinedSubprogramDIEs; - // This is a collection of subprogram MDNodes that are processed to // create DIEs. SmallPtrSet ProcessedSPNodes; @@ -243,10 +222,6 @@ class DwarfDebug : public AsmPrinterHandler { // If nonnull, stores the current machine instruction we're processing. const MachineInstr *CurMI; - // If nonnull, stores the section that the previous function was allocated to - // emitting. - const MCSection *PrevSection; - // If nonnull, stores the CU in which the previous subprogram was contained. const DwarfCompileUnit *PrevCU; @@ -258,6 +233,7 @@ class DwarfDebug : public AsmPrinterHandler { MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym; + MCSymbol *DwarfTypesDWOSectionSym; MCSymbol *DwarfStrDWOSectionSym; MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; @@ -322,6 +298,7 @@ class DwarfDebug : public AsmPrinterHandler { // True iff there are multiple CUs in this module. bool SingleCU; + bool IsDarwin; AddressPool AddrPool; @@ -334,8 +311,6 @@ class DwarfDebug : public AsmPrinterHandler { MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); - void addScopeVariable(LexicalScope *LS, DbgVariable *Var); - const SmallVectorImpl> &getUnits() { return InfoHolder.getUnits(); } @@ -350,45 +325,8 @@ class DwarfDebug : public AsmPrinterHandler { void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var, const MDNode *Scope); - /// \brief Find DIE for the given subprogram and attach appropriate - /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global - /// variables in this scope then create and insert DIEs for these - /// variables. - DIE &updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP); - - /// \brief A helper function to check whether the DIE for a given Scope is - /// going to be null. - bool isLexicalScopeDIENull(LexicalScope *Scope); - - /// \brief A helper function to construct a RangeSpanList for a given - /// lexical scope. - void addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE, - const SmallVectorImpl &Range); - - /// \brief Construct new DW_TAG_lexical_block for this scope and - /// attach DW_AT_low_pc/DW_AT_high_pc labels. - std::unique_ptr constructLexicalScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - - /// \brief This scope represents inlined body of a function. Construct - /// DIE to represent this concrete inlined copy of the function. - std::unique_ptr constructInlinedScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - - /// \brief Construct a DIE for this scope. - std::unique_ptr constructScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - void createAndAddScopeChildren(DwarfCompileUnit &TheCU, LexicalScope *Scope, - DIE &ScopeDIE); /// \brief Construct a DIE for this abstract scope. - void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - /// \brief Construct a DIE for this subprogram scope. - DIE &constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, - LexicalScope *Scope); - /// A helper function to create children of a Scope DIE. - DIE *createScopeChildrenDIE(DwarfCompileUnit &TheCU, LexicalScope *Scope, - SmallVectorImpl> &Children); + void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -424,6 +362,10 @@ class DwarfDebug : public AsmPrinterHandler { /// the line matrix. void emitEndOfLineMatrix(unsigned SectionEnd); + /// \brief Emit a specified accelerator table. + void emitAccel(DwarfAccelTable &Accel, const MCSection *Section, + StringRef TableName, StringRef SymName); + /// \brief Emit visible names into a hashed accelerator table section. void emitAccelNames(); @@ -449,10 +391,9 @@ class DwarfDebug : public AsmPrinterHandler { /// index. void emitDebugPubTypes(bool GnuStyle = false); - void - emitDebugPubSection(bool GnuStyle, const MCSection *PSec, StringRef Name, - const StringMap &(DwarfUnit::*Accessor)() - const); + void emitDebugPubSection( + bool GnuStyle, const MCSection *PSec, StringRef Name, + const StringMap &(DwarfCompileUnit::*Accessor)() const); /// \brief Emit visible names into a debug str section. void emitDebugStr(); @@ -507,15 +448,8 @@ class DwarfDebug : public AsmPrinterHandler { DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit); /// \brief Construct imported_module or imported_declaration DIE. - void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N); - - /// \brief Construct import_module DIE. - void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N, - DIE &Context); - - /// \brief Construct import_module DIE. - void constructImportedEntityDIE(DwarfCompileUnit &TheCU, - const DIImportedEntity &Module, DIE &Context); + void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const MDNode *N); /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the @@ -527,38 +461,29 @@ class DwarfDebug : public AsmPrinterHandler { /// ending of a scope. void identifyScopeMarkers(); - /// \brief If Var is an current function argument that add it in - /// CurrentFnArguments list. - bool addCurrentFnArgument(DbgVariable *Var, LexicalScope *Scope); - /// \brief Populate LexicalScope entries with variables' info. - void collectVariableInfo(SmallPtrSet &ProcessedVars); + void collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP, + SmallPtrSetImpl &ProcessedVars); + + /// \brief Build the location list for all DBG_VALUEs in the + /// function that describe the same variable. + void buildLocationList(SmallVectorImpl &DebugLoc, + const DbgValueHistoryMap::InstrRanges &Ranges); /// \brief Collect variable information from the side table maintained /// by MMI. - void collectVariableInfoFromMMITable(SmallPtrSet &P); + void collectVariableInfoFromMMITable(SmallPtrSetImpl &P); /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); } - /// \brief Return Label preceding the instruction. - MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); } - /// \brief Return Label immediately following the instruction. - MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - - void attachRangesOrLowHighPC(DwarfCompileUnit &Unit, DIE &D, - const SmallVectorImpl &Ranges); - void attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, MCSymbol *Begin, - MCSymbol *End); - public: //===--------------------------------------------------------------------===// // Main entry points. @@ -567,13 +492,6 @@ public: ~DwarfDebug() override; - void insertDIE(const MDNode *TypeMD, DIE *Die) { - MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); - } - DIE *getDIE(const MDNode *TypeMD) { - return MDTypeNodeToDieMap.lookup(TypeMD); - } - /// \brief Emit all Dwarf sections that should come prior to the /// content. void beginModule(); @@ -626,11 +544,15 @@ public: /// Returns the section symbol for the .debug_loc section. MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; } - /// Returns the previous section that was emitted into. - const MCSection *getPrevSection() const { return PrevSection; } + /// Returns the section symbol for the .debug_str section. + MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; } + + /// Returns the section symbol for the .debug_ranges section. + MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; } /// Returns the previous CU that was being updated const DwarfCompileUnit *getPrevCU() const { return PrevCU; } + void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } /// Returns the entries for the .debug_loc section. const SmallVectorImpl & @@ -641,6 +563,13 @@ public: /// \brief Emit an entry for the debug loc section. This can be used to /// handle an entry that's going to be emitted into the debug loc section. void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry); + /// \brief emit a single value for the debug loc section. + void emitDebugLocValue(ByteStreamer &Streamer, + const DebugLocEntry::Value &Value); + /// Emits an optimal (=sorted) sequence of DW_OP_pieces. + void emitLocPieces(ByteStreamer &Streamer, + const DITypeIdentifierMap &Map, + ArrayRef Values); /// Emit the location for a debug loc entry, including the size header. void emitDebugLocEntryLocation(const DebugLocEntry &Entry); @@ -674,6 +603,40 @@ public: void addAccelNamespace(StringRef Name, const DIE &Die); void addAccelType(StringRef Name, const DIE &Die, char Flags); + + const MachineFunction *getCurrentFunction() const { return CurFn; } + const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; } + const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; } + + iterator_range + findImportedEntitiesForScope(const MDNode *Scope) const { + return make_range(std::equal_range( + ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), + std::pair(Scope, nullptr), + less_first())); + } + + /// \brief A helper function to check whether the DIE for a given Scope is + /// going to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); + + /// \brief Return Label preceding the instruction. + MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// \brief Return Label immediately following the instruction. + MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + + // FIXME: Consider rolling ranges up into DwarfDebug since we use a single + // range_base anyway, so there's no need to keep them as separate per-CU range + // lists. (though one day we might end up with a range.dwo section, in which + // case it'd go to DwarfFile) + unsigned getNextRangeNumber() { return GlobalRangeCount++; } + + // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. + + SmallPtrSet &getProcessedSPNodes() { + return ProcessedSPNodes; + } }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index 0440fce..e8867c0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H -#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -81,39 +81,6 @@ public: /// endFunction - Gather and emit post-function exception information. void endFunction(const MachineFunction *) override; }; - -class Win64Exception : public EHStreamer { - /// shouldEmitPersonality - Per-function flag to indicate if personality - /// info should be emitted. - bool shouldEmitPersonality; - - /// shouldEmitLSDA - Per-function flag to indicate if the LSDA - /// should be emitted. - bool shouldEmitLSDA; - - /// shouldEmitMoves - Per-function flag to indicate if frame moves info - /// should be emitted. - bool shouldEmitMoves; - -public: - //===--------------------------------------------------------------------===// - // Main entry points. - // - Win64Exception(AsmPrinter *A); - virtual ~Win64Exception(); - - /// endModule - Emit all exception information that should come after the - /// content. - void endModule() override; - - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. - void beginFunction(const MachineFunction *MF) override; - - /// endFunction - Gather and emit post-function exception information. - void endFunction(const MachineFunction *) override; -}; - } // End of namespace llvm #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp index 737ee54..50180ea 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -18,8 +18,9 @@ #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { -DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) - : Asm(AP), StrPool(DA, *Asm, Pref) {} +DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, + BumpPtrAllocator &DA) + : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {} DwarfFile::~DwarfFile() {} @@ -48,25 +49,18 @@ void DwarfFile::addUnit(std::unique_ptr U) { // Emit the various dwarf units to the unit section USection with // the abbreviations going into ASection. -void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) { +void DwarfFile::emitUnits(const MCSymbol *ASectionSym) { for (const auto &TheU : CUs) { DIE &Die = TheU->getUnitDie(); const MCSection *USection = TheU->getSection(); Asm->OutStreamer.SwitchSection(USection); - // Emit the compile units header. - Asm->OutStreamer.EmitLabel(TheU->getLabelBegin()); - - // Emit size of content not including length itself - Asm->OutStreamer.AddComment("Length of Unit"); - Asm->EmitInt32(TheU->getHeaderSize() + Die.getSize()); - TheU->emitHeader(ASectionSym); - DD->emitDIE(Die); - Asm->OutStreamer.EmitLabel(TheU->getLabelEnd()); + DD.emitDIE(Die); } } + // Compute the size and offset for each DIE. void DwarfFile::computeSizeAndOffsets() { // Offset from the first CU in the debug info section is 0 initially. @@ -149,8 +143,44 @@ void DwarfFile::emitAbbrevs(const MCSection *Section) { // Emit strings into a string section. void DwarfFile::emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection, - const MCSymbol *StrSecSym) { - StrPool.emit(*Asm, StrSection, OffsetSection, StrSecSym); + const MCSection *OffsetSection) { + StrPool.emit(*Asm, StrSection, OffsetSection); +} + +void DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { + SmallVectorImpl &Vars = ScopeVariables[LS]; + DIVariable DV = Var->getVariable(); + // Variables with positive arg numbers are parameters. + if (unsigned ArgNum = DV.getArgNumber()) { + // Keep all parameters in order at the start of the variable list to ensure + // function types are correct (no out-of-order parameters) + // + // This could be improved by only doing it for optimized builds (unoptimized + // builds have the right order to begin with), searching from the back (this + // would catch the unoptimized case quickly), or doing a binary search + // rather than linear search. + auto I = Vars.begin(); + while (I != Vars.end()) { + unsigned CurNum = (*I)->getVariable().getArgNumber(); + // A local (non-parameter) variable has been found, insert immediately + // before it. + if (CurNum == 0) + break; + // A later indexed parameter has been found, insert immediately before it. + if (CurNum > ArgNum) + break; + // FIXME: There are still some cases where two inlined functions are + // conflated together (two calls to the same function at the same + // location (eg: via a macro, or without column info, etc)) and then + // their arguments are conflated as well. + assert((LS->getParent() || CurNum != ArgNum) && + "Duplicate argument for top level (non-inlined) function"); + ++I; + } + Vars.insert(I, Var); + return; + } + + Vars.push_back(Var); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index 3985eb2..9d64bfc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFFILE_H__ -#define CODEGEN_ASMPRINTER_DWARFFILE_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -24,10 +24,13 @@ namespace llvm { class AsmPrinter; +class DbgVariable; class DwarfUnit; class DIEAbbrev; class MCSymbol; class DIE; +class DISubprogram; +class LexicalScope; class StringRef; class DwarfDebug; class MCSection; @@ -35,6 +38,8 @@ class DwarfFile { // Target of Dwarf emission, used for sizing of abbreviations. AsmPrinter *Asm; + DwarfDebug ⅅ + // Used to uniquely define abbreviations. FoldingSet AbbreviationsSet; @@ -46,8 +51,20 @@ class DwarfFile { DwarfStringPool StrPool; + // Collection of dbg variables of a scope. + DenseMap> ScopeVariables; + + // Collection of abstract subprogram DIEs. + DenseMap AbstractSPDies; + + /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can + /// be shared across CUs, that is why we keep the map here instead + /// of in DwarfCompileUnit. + DenseMap MDTypeNodeToDieMap; + public: - DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); + DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref, + BumpPtrAllocator &DA); ~DwarfFile(); @@ -67,18 +84,34 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym); + void emitUnits(const MCSymbol *ASectionSym); /// \brief Emit a set of abbreviations to the specific section. void emitAbbrevs(const MCSection *); /// \brief Emit all of the strings to the section given. void emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection = nullptr, - const MCSymbol *StrSecSym = nullptr); + const MCSection *OffsetSection = nullptr); /// \brief Returns the string pool. DwarfStringPool &getStringPool() { return StrPool; } + + void addScopeVariable(LexicalScope *LS, DbgVariable *Var); + + DenseMap> &getScopeVariables() { + return ScopeVariables; + } + + DenseMap &getAbstractSPDies() { + return AbstractSPDies; + } + + void insertDIE(const MDNode *TypeMD, DIE *Die) { + MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + } + DIE *getDIE(const MDNode *TypeMD) { + return MDTypeNodeToDieMap.lookup(TypeMD); + } }; } #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp index 72cab60..d76b66c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -12,14 +12,11 @@ using namespace llvm; -MCSymbol *DwarfStringPool::getSectionSymbol() { return SectionSymbol; } - static std::pair & getEntry(AsmPrinter &Asm, StringMap, BumpPtrAllocator &> &Pool, StringRef Prefix, StringRef Str) { - std::pair &Entry = - Pool.GetOrCreateValue(Str).getValue(); + std::pair &Entry = Pool[Str]; if (!Entry.first) { Entry.second = Pool.size() - 1; Entry.first = Asm.GetTempSymbol(Prefix, Entry.second); @@ -36,8 +33,7 @@ unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) { } void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection, - const MCSection *OffsetSection, - const MCSymbol *StrSecSym) { + const MCSection *OffsetSection) { if (Pool.empty()) return; diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h index c1615fb..ab32c1b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_STRINGPOOL_H__ -#define CODEGEN_ASMPRINTER_STRINGPOOL_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -28,18 +28,13 @@ class StringRef; class DwarfStringPool { StringMap, BumpPtrAllocator &> Pool; StringRef Prefix; - MCSymbol *SectionSymbol; public: DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix) - : Pool(A), Prefix(Prefix), SectionSymbol(Asm.GetTempSymbol(Prefix)) {} + : Pool(A), Prefix(Prefix) {} void emit(AsmPrinter &Asm, const MCSection *StrSection, - const MCSection *OffsetSection = nullptr, - const MCSymbol *StrSecSym = nullptr); - - /// \brief Returns the entry into the start of the pool. - MCSymbol *getSectionSymbol(); + const MCSection *OffsetSection = nullptr); /// \brief Returns an entry into the string pool with the given /// string text. diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 9538bee..919d9d2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -12,7 +12,9 @@ //===----------------------------------------------------------------------===// #include "DwarfUnit.h" + #include "DwarfAccelTable.h" +#include "DwarfCompileUnit.h" #include "DwarfDebug.h" #include "llvm/ADT/APFloat.h" #include "llvm/IR/Constants.h" @@ -30,6 +32,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -44,20 +47,12 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A), - DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr), - Skeleton(nullptr) { + DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) { assert(UnitTag == dwarf::DW_TAG_compile_unit || UnitTag == dwarf::DW_TAG_type_unit); DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } -DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, - AsmPrinter *A, DwarfDebug *DW, - DwarfFile *DWU) - : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU) { - insertDIE(Node, &getUnitDie()); -} - DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable) @@ -146,7 +141,7 @@ static bool isShareableAcrossCUs(DIDescriptor D) { /// will be kept in DwarfDebug for shareable DIEs. DIE *DwarfUnit::getDIE(DIDescriptor D) const { if (isShareableAcrossCUs(D)) - return DD->getDIE(D); + return DU->getDIE(D); return MDNodeToDieMap.lookup(D); } @@ -155,7 +150,7 @@ DIE *DwarfUnit::getDIE(DIDescriptor D) const { /// will be kept in DwarfDebug for shareable DIEs. void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) { if (isShareableAcrossCUs(Desc)) { - DD->insertDIE(Desc, D); + DU->insertDIE(Desc, D); return; } MDNodeToDieMap.insert(std::make_pair(Desc, D)); @@ -206,10 +201,14 @@ void DwarfUnit::addSInt(DIELoc &Die, Optional Form, /// table. void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { - - if (!DD->useSplitDwarf()) + if (!isDwoUnit()) return addLocalString(Die, Attribute, String); + addIndexedString(Die, Attribute, String); +} + +void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute, + StringRef String) { unsigned idx = DU->getStringPool().getIndex(*Asm, String); DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); @@ -224,31 +223,12 @@ void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute, DIEValue *Value; if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) Value = new (DIEValueAllocator) DIELabel(Symb); - else { - MCSymbol *StringPool = DU->getStringPool().getSectionSymbol(); - Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); - } + else + Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym()); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); Die.addValue(Attribute, dwarf::DW_FORM_strp, Str); } -/// addExpr - Add a Dwarf expression attribute data and value. -/// -void DwarfUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) { - DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); - Die.addValue((dwarf::Attribute)0, Form, Value); -} - -/// addLocationList - Add a Dwarf loclistptr attribute data and value. -/// -void DwarfUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, - unsigned Index) { - DIEValue *Value = new (DIEValueAllocator) DIELocList(Index); - dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4; - Die.addValue(Attribute, Form, Value); -} - /// addLabel - Add a Dwarf label attribute data and value. /// void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, @@ -261,16 +241,6 @@ void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { addLabel(Die, (dwarf::Attribute)0, Form, Label); } -/// addSectionLabel - Add a Dwarf section label attribute data and value. -/// -void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label) { - if (DD->getDwarfVersion() >= 4) - addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); - else - addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label); -} - /// addSectionOffset - Add an offset into a section attribute data and value. /// void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, @@ -281,45 +251,6 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); } -/// addLabelAddress - Add a dwarf label attribute data and value using -/// DW_FORM_addr or DW_FORM_GNU_addr_index. -/// -void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label) { - - if (!DD->useSplitDwarf()) - return addLocalLabelAddress(Die, Attribute, Label); - - if (Label) - DD->addArangeLabel(SymbolCU(this, Label)); - - unsigned idx = DD->getAddressPool().getIndex(Label); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); -} - -void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, - dwarf::Attribute Attribute, - const MCSymbol *Label) { - if (Label) - DD->addArangeLabel(SymbolCU(this, Label)); - - Die.addValue(Attribute, dwarf::DW_FORM_addr, - Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label) - : new (DIEValueAllocator) DIEInteger(0)); -} - -unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { - // If we print assembly, we can't separate .file entries according to - // compile units. Thus all files will belong to the default compile unit. - - // FIXME: add a better feature test than hasRawTextSupport. Even better, - // extend .file to support this. - return Asm->OutStreamer.EmitDwarfFileDirective( - 0, DirName, FileName, - Asm->OutStreamer.hasRawTextSupport() ? 0 : getUniqueID()); -} - unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { return SplitLineTable ? SplitLineTable->getFile(DirName, FileName) : getCU().getOrCreateSourceID(FileName, DirName); @@ -339,16 +270,6 @@ void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { } } -/// addSectionDelta - Add a section label delta attribute data and value. -/// -void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset - : dwarf::DW_FORM_data4, - Value); -} - void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); @@ -477,22 +398,12 @@ void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) { addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory()); } -/// addVariableAddress - Add DW_AT_location attribute for a -/// DbgVariable based on provided MachineLocation. -void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, - MachineLocation Location) { - if (DV.variableHasComplexAddress()) - addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV.isBlockByrefVariable()) - addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); - else - addAddress(Die, dwarf::DW_AT_location, Location, - DV.getVariable().isIndirect()); -} - /// addRegisterOp - Add register operand. -void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) { - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); +// FIXME: Ideally, this would share the implementation with +// AsmPrinter::EmitDwarfRegOpPiece. +void DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, + unsigned SizeInBits, unsigned OffsetInBits) { + const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo(); int DWReg = RI->getDwarfRegNum(Reg, false); bool isSubRegister = DWReg < 0; @@ -511,7 +422,7 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) { return; } - // Emit register + // Emit register. if (DWReg < 32) addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); else { @@ -519,18 +430,34 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) { addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } - // Emit Mask - if (isSubRegister) { - unsigned Size = RI->getSubRegIdxSize(Idx); - unsigned Offset = RI->getSubRegIdxOffset(Idx); - if (Offset > 0) { + // Emit mask. + bool isPiece = SizeInBits > 0; + if (isSubRegister || isPiece) { + const unsigned SizeOfByte = 8; + unsigned RegSizeInBits = RI->getSubRegIdxSize(Idx); + unsigned RegOffsetInBits = RI->getSubRegIdxOffset(Idx); + unsigned PieceSizeInBits = std::max(SizeInBits, RegSizeInBits); + unsigned PieceOffsetInBits = OffsetInBits ? OffsetInBits : RegOffsetInBits; + assert(RegSizeInBits >= SizeInBits && "register smaller than value"); + + if (RegOffsetInBits != PieceOffsetInBits) { + // Manually shift the value into place, since the DW_OP_piece + // describes the part of the variable, not the position of the + // subregister. + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(TheDie, dwarf::DW_FORM_data1, RegOffsetInBits); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_shr); + } + + if (PieceOffsetInBits > 0 || PieceSizeInBits % SizeOfByte) { + assert(PieceSizeInBits > 0 && "piece has zero size"); addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bit_piece); - addUInt(TheDie, dwarf::DW_FORM_data1, Size); - addUInt(TheDie, dwarf::DW_FORM_data1, Offset); - } else { - unsigned ByteSize = Size / 8; // Assuming 8 bits per byte. + addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits); + addUInt(TheDie, dwarf::DW_FORM_data1, PieceOffsetInBits); + } else { + assert(PieceSizeInBits > 0 && "piece has zero size"); addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_piece); - addUInt(TheDie, dwarf::DW_FORM_data1, ByteSize); + addUInt(TheDie, dwarf::DW_FORM_data1, PieceSizeInBits/SizeOfByte); } } } @@ -538,9 +465,9 @@ void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) { /// addRegisterOffset - Add register offset. void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset) { - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + const TargetRegisterInfo *RI = Asm->TM.getSubtargetImpl()->getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = Asm->TM.getSubtargetImpl()->getRegisterInfo(); if (Reg == TRI->getFrameRegister(*Asm->MF)) // If variable offset is based in frame register then use fbreg. addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); @@ -553,63 +480,6 @@ void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, addSInt(TheDie, dwarf::DW_FORM_sdata, Offset); } -/// addAddress - Add an address attribute to a die based on the location -/// provided. -void DwarfUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, - const MachineLocation &Location, bool Indirect) { - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - - if (Location.isReg() && !Indirect) - addRegisterOp(*Loc, Location.getReg()); - else { - addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); - if (Indirect && !Location.isReg()) { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, Loc); -} - -/// addComplexAddress - Start with the address based on the location provided, -/// and generate the DWARF information necessary to find the actual variable -/// given the extra address information encoded in the DbgVariable, starting -/// from the starting location. Add the DWARF information to the die. -/// -void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, - dwarf::Attribute Attribute, - const MachineLocation &Location) { - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - unsigned N = DV.getNumAddrElements(); - unsigned i = 0; - if (Location.isReg()) { - if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - // If first address element is OpPlus then emit - // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1)); - i = 2; - } else - addRegisterOp(*Loc, Location.getReg()); - } else - addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); - - for (; i < N; ++i) { - uint64_t Element = DV.getAddrElement(i); - if (Element == DIBuilder::OpPlus) { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - if (!Location.isReg()) - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else - llvm_unreachable("unknown DIBuilder Opcode"); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, Loc); -} - /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName either the struct, or a pointer to the struct, as @@ -690,7 +560,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // Find the __forwarding field and the variable field in the __Block_byref // struct. - DIArray Fields = blockStruct.getTypeArray(); + DIArray Fields = blockStruct.getElements(); DIDerivedType varField; DIDerivedType forwardingField; @@ -712,7 +582,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, DIELoc *Loc = new (DIEValueAllocator) DIELoc(); if (Location.isReg()) - addRegisterOp(*Loc, Location.getReg()); + addRegisterOpPiece(*Loc, Location.getReg()); else addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); @@ -1002,11 +872,9 @@ void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; DD->addAccelType(Ty.getName(), TyDIE, Flags); - if ((!Context || Context.isCompileUnit() || Context.isFile() || - Context.isNameSpace()) && - getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly) - GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = - &TyDIE; + if (!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace()) + addGlobalType(Ty, TyDIE, Context); } } @@ -1031,14 +899,6 @@ void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) { addDIEEntry(Entity, Attribute, Entry); } -/// addGlobalName - Add a new global name to the compile unit. -void DwarfUnit::addGlobalName(StringRef Name, DIE &Die, DIScope Context) { - if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly) - return; - std::string FullName = getParentContextString(Context) + Name.str(); - GlobalNames[FullName] = &Die; -} - /// getParentContextString - Walks the metadata parent chain in a language /// specific manner (using the compile unit language) and returns /// it as a string. This is done at the metadata level because DIEs may @@ -1129,16 +989,16 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { } /// constructSubprogramArguments - Construct function argument DIEs. -void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) { +void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeArray Args) { for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIDescriptor Ty = Args.getElement(i); - if (Ty.isUnspecifiedParameter()) { + DIType Ty = resolve(Args.getElement(i)); + if (!Ty) { assert(i == N-1 && "Unspecified parameter must be the last argument"); createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); } else { DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); - addType(Arg, DIType(Ty)); - if (DIType(Ty).isArtificial()) + addType(Arg, Ty); + if (Ty.isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); } } @@ -1161,14 +1021,14 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { break; case dwarf::DW_TAG_subroutine_type: { // Add return type. A void return won't have a type. - DIArray Elements = CTy.getTypeArray(); - DIType RTy(Elements.getElement(0)); + DITypeArray Elements = DISubroutineType(CTy).getTypeArray(); + DIType RTy(resolve(Elements.getElement(0))); if (RTy) addType(Buffer, RTy); bool isPrototyped = true; if (Elements.getNumElements() == 2 && - Elements.getElement(1).isUnspecifiedParameter()) + !Elements.getElement(1)) isPrototyped = false; constructSubprogramArguments(Buffer, Elements); @@ -1191,7 +1051,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { // Add elements to structure type. - DIArray Elements = CTy.getTypeArray(); + DIArray Elements = CTy.getElements(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.isSubprogram()) @@ -1373,20 +1233,23 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { } /// getOrCreateSubprogramDIE - Create new DIE using SP. -DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { +DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE (as is the case for member function // declarations). - DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + DIE *ContextDIE = + Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP.getContext())); if (DIE *SPDie = getDIE(SP)) return SPDie; if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { - // Add subprogram definitions to the CU die directly. - ContextDIE = &getUnitDie(); - // Build the decl now to ensure it precedes the definition. - getOrCreateSubprogramDIE(SPDecl); + if (!Minimal) { + // Add subprogram definitions to the CU die directly. + ContextDIE = &getUnitDie(); + // Build the decl now to ensure it precedes the definition. + getOrCreateSubprogramDIE(SPDecl); + } } // DW_TAG_inlined_subroutine may refer to this DIE. @@ -1401,14 +1264,8 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { return &SPDie; } -void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) { - DISubprogram SPDecl = SP.getFunctionDeclaration(); - DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); - applySubprogramAttributes(SP, SPDie); - addGlobalName(SP.getName(), SPDie, Context); -} - -void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { +bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP, + DIE &SPDie) { DIE *DeclDie = nullptr; StringRef DeclLinkageName; if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { @@ -1431,17 +1288,29 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); - if (DeclDie) { - // Refer to the function declaration where all the other attributes will be - // found. - addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); - return; - } + if (!DeclDie) + return false; + + // Refer to the function declaration where all the other attributes will be + // found. + addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); + return true; +} + +void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie, + bool Minimal) { + if (!Minimal) + if (applySubprogramDefinitionAttributes(SP, SPDie)) + return; // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP.getName()); + // Skip the rest of the attributes under -gmlt to save space. + if (Minimal) + return; + addSourceLine(SPDie, SP); // Add the prototype if we have a prototype and we have a C like @@ -1452,15 +1321,15 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - DICompositeType SPTy = SP.getType(); + DISubroutineType SPTy = SP.getType(); assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && "the type of a subprogram should be a subroutine"); - DIArray Args = SPTy.getTypeArray(); + DITypeArray Args = SPTy.getTypeArray(); // Add a return type. If this is a type like a C/C++ void type we don't add a // return type. - if (Args.getElement(0)) - addType(SPDie, DIType(Args.getElement(0))); + if (resolve(Args.getElement(0))) + addType(SPDie, DIType(resolve(Args.getElement(0)))); unsigned VK = SP.getVirtuality(); if (VK) { @@ -1506,7 +1375,7 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { else if (SP.isPrivate()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else + else if (SP.isPublic()) addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); @@ -1514,184 +1383,6 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { addFlag(SPDie, dwarf::DW_AT_explicit); } -void DwarfUnit::applyVariableAttributes(const DbgVariable &Var, - DIE &VariableDie) { - StringRef Name = Var.getName(); - if (!Name.empty()) - addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, Var.getVariable()); - addType(VariableDie, Var.getType()); - if (Var.isArtificial()) - addFlag(VariableDie, dwarf::DW_AT_artificial); -} - -// Return const expression if value is a GEP to access merged global -// constant. e.g. -// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) -static const ConstantExpr *getMergedGlobalExpr(const Value *V) { - const ConstantExpr *CE = dyn_cast_or_null(V); - if (!CE || CE->getNumOperands() != 3 || - CE->getOpcode() != Instruction::GetElementPtr) - return nullptr; - - // First operand points to a global struct. - Value *Ptr = CE->getOperand(0); - if (!isa(Ptr) || - !isa(cast(Ptr->getType())->getElementType())) - return nullptr; - - // Second operand is zero. - const ConstantInt *CI = dyn_cast_or_null(CE->getOperand(1)); - if (!CI || !CI->isZero()) - return nullptr; - - // Third operand is offset. - if (!isa(CE->getOperand(2))) - return nullptr; - - return CE; -} - -/// createGlobalVariableDIE - create global variable DIE. -void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { - // Check for pre-existence. - if (getDIE(GV)) - return; - - assert(GV.isGlobalVariable()); - - DIScope GVContext = GV.getContext(); - DIType GTy = DD->resolve(GV.getType()); - - // If this is a static data member definition, some attributes belong - // to the declaration DIE. - DIE *VariableDIE = nullptr; - bool IsStaticMember = false; - DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); - if (SDMDecl.Verify()) { - assert(SDMDecl.isStaticMember() && "Expected static member decl"); - // We need the declaration DIE that is in the static member's class. - VariableDIE = getOrCreateStaticMemberDIE(SDMDecl); - IsStaticMember = true; - } - - // If this is not a static data member definition, create the variable - // DIE and add the initial set of attributes to it. - if (!VariableDIE) { - // Construct the context before querying for the existence of the DIE in - // case such construction creates the DIE. - DIE *ContextDIE = getOrCreateContextDIE(GVContext); - - // Add to map. - VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV); - - // Add name and type. - addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); - addType(*VariableDIE, GTy); - - // Add scoping info. - if (!GV.isLocalToUnit()) - addFlag(*VariableDIE, dwarf::DW_AT_external); - - // Add line number info. - addSourceLine(*VariableDIE, GV); - } - - // Add location. - bool addToAccelTable = false; - DIE *VariableSpecDIE = nullptr; - bool isGlobalVariable = GV.getGlobal() != nullptr; - if (isGlobalVariable) { - addToAccelTable = true; - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); - if (GV.getGlobal()->isThreadLocal()) { - // FIXME: Make this work with -gsplit-dwarf. - unsigned PointerSize = Asm->getDataLayout().getPointerSize(); - assert((PointerSize == 4 || PointerSize == 8) && - "Add support for other sizes if necessary"); - // Based on GCC's support for TLS: - if (!DD->useSplitDwarf()) { - // 1) Start with a constNu of the appropriate pointer size - addUInt(*Loc, dwarf::DW_FORM_data1, - PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); - // 2) containing the (relocated) offset of the TLS variable - // within the module's TLS block. - addExpr(*Loc, dwarf::DW_FORM_udata, - Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); - } else { - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(*Loc, dwarf::DW_FORM_udata, - DD->getAddressPool().getIndex(Sym, /* TLS */ true)); - } - // 3) followed by a custom OP to make the debugger do a TLS lookup. - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); - } else { - DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(*Loc, Sym); - } - // Do not create specification DIE if context is either compile unit - // or a subprogram. - if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { - // Create specification DIE. - VariableSpecDIE = &createAndAddDIE(dwarf::DW_TAG_variable, UnitDie); - addDIEEntry(*VariableSpecDIE, dwarf::DW_AT_specification, *VariableDIE); - addBlock(*VariableSpecDIE, dwarf::DW_AT_location, Loc); - // A static member's declaration is already flagged as such. - if (!SDMDecl.Verify()) - addFlag(*VariableDIE, dwarf::DW_AT_declaration); - } else { - addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - } - // Add the linkage name. - StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) - // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: - // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and - // TAG_variable. - addString(IsStaticMember && VariableSpecDIE ? *VariableSpecDIE - : *VariableDIE, - DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name - : dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); - } else if (const ConstantInt *CI = - dyn_cast_or_null(GV.getConstant())) { - // AT_const_value was added when the static member was created. To avoid - // emitting AT_const_value multiple times, we only add AT_const_value when - // it is not a static member. - if (!IsStaticMember) - addConstantValue(*VariableDIE, CI, GTy); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { - addToAccelTable = true; - // GV is a merged global. - DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - Value *Ptr = CE->getOperand(0); - MCSymbol *Sym = Asm->getSymbol(cast(Ptr)); - DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(*Loc, Sym); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SmallVector Idx(CE->op_begin() + 1, CE->op_end()); - addUInt(*Loc, dwarf::DW_FORM_udata, - Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); - } - - if (addToAccelTable) { - DIE &AddrDIE = VariableSpecDIE ? *VariableSpecDIE : *VariableDIE; - DD->addAccelName(GV.getName(), AddrDIE); - - // If the linkage name is different than the name, go ahead and output - // that as well into the name table. - if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) - DD->addAccelName(GV.getLinkageName(), AddrDIE); - } - - addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE, - GV.getContext()); -} - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); @@ -1700,9 +1391,7 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { // The LowerBound value defines the lower bounds which is typically zero for // C/C++. The Count value is the number of elements. Values are 64 bit. If // Count == -1 then the array is unbounded and we do not emit - // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and - // Count == 0, then the array has zero elements in which case we do not emit - // an upper bound. + // DW_AT_lower_bound and DW_AT_count attributes. int64_t LowerBound = SR.getLo(); int64_t DefaultLowerBound = getDefaultLowerBound(); int64_t Count = SR.getCount(); @@ -1710,11 +1399,22 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); - if (Count != -1 && Count != 0) + if (Count != -1) // FIXME: An unbounded array should reference the expression that defines // the array. - addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None, - LowerBound + Count - 1); + addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); +} + +DIE *DwarfUnit::getIndexTyDie() { + if (IndexTyDie) + return IndexTyDie; + // Construct an integer type to use for indexes. + IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie); + addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype"); + addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); + addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_unsigned); + return IndexTyDie; } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. @@ -1729,18 +1429,9 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { // FIXME: This type should be passed down from the front end // as different languages may have different sizes for indexes. DIE *IdxTy = getIndexTyDie(); - if (!IdxTy) { - // Construct an integer type to use for indexes. - IdxTy = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie); - addString(*IdxTy, dwarf::DW_AT_name, "sizetype"); - addUInt(*IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); - addUInt(*IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_unsigned); - setIndexTyDie(IdxTy); - } // Add subranges to array type. - DIArray Elements = CTy.getTypeArray(); + DIArray Elements = CTy.getElements(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) @@ -1750,7 +1441,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { /// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { - DIArray Elements = CTy.getTypeArray(); + DIArray Elements = CTy.getElements(); // Add enumerators to enumeration type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { @@ -1788,68 +1479,6 @@ void DwarfUnit::constructContainingTypeDIEs() { } } -/// constructVariableDIE - Construct a DIE for the given DbgVariable. -std::unique_ptr DwarfUnit::constructVariableDIE(DbgVariable &DV, - bool Abstract) { - auto D = constructVariableDIEImpl(DV, Abstract); - DV.setDIE(*D); - return D; -} - -std::unique_ptr DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, - bool Abstract) { - // Define variable debug information entry. - auto VariableDie = make_unique(DV.getTag()); - - if (Abstract) { - applyVariableAttributes(DV, *VariableDie); - return VariableDie; - } - - // Add variable address. - - unsigned Offset = DV.getDotDebugLocOffset(); - if (Offset != ~0U) { - addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); - return VariableDie; - } - - // Check if variable is described by a DBG_VALUE instruction. - if (const MachineInstr *DVInsn = DV.getMInsn()) { - assert(DVInsn->getNumOperands() == 3); - if (DVInsn->getOperand(0).isReg()) { - const MachineOperand RegOp = DVInsn->getOperand(0); - // If the second operand is an immediate, this is an indirect value. - if (DVInsn->getOperand(1).isImm()) { - MachineLocation Location(RegOp.getReg(), - DVInsn->getOperand(1).getImm()); - addVariableAddress(DV, *VariableDie, Location); - } else if (RegOp.getReg()) - addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); - } else if (DVInsn->getOperand(0).isImm()) - addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); - else if (DVInsn->getOperand(0).isFPImm()) - addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isCImm()) - addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), - DV.getType()); - - return VariableDie; - } - - // .. else use frame index. - int FI = DV.getFrameIndex(); - if (FI != ~0) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, *VariableDie, Location); - } - - return VariableDie; -} - /// constructMemberDIE - Construct member DIE from DIDerivedType. void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer); @@ -1922,7 +1551,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. - else + else if (DT.isPublic()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (DT.isVirtual()) @@ -1971,7 +1600,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { else if (DT.isPrivate()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else + else if (DT.isPublic()) addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); @@ -1984,6 +1613,10 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { } void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { + // Emit size of content not including length itself + Asm->OutStreamer.AddComment("Length of Unit"); + Asm->EmitInt32(getHeaderSize() + UnitDie.getSize()); + Asm->OutStreamer.AddComment("DWARF version number"); Asm->EmitInt16(DD->getDwarfVersion()); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); @@ -1999,50 +1632,9 @@ void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); } -void DwarfUnit::addRange(RangeSpan Range) { - // Only add a range for this unit if we're emitting full debug. - if (getCUNode().getEmissionKind() == DIBuilder::FullDebug) { - // If we have no current ranges just add the range and return, otherwise, - // check the current section and CU against the previous section and CU we - // emitted into and the subprogram was contained within. If these are the - // same then extend our current range, otherwise add this as a new range. - if (CURanges.size() == 0 || - this != DD->getPrevCU() || - Asm->getCurrentSection() != DD->getPrevSection()) { - CURanges.push_back(Range); - return; - } - - assert(&(CURanges.back().getEnd()->getSection()) == - &(Range.getEnd()->getSection()) && - "We can only append to a range in the same section!"); - CURanges.back().setEnd(Range.getEnd()); - } -} - -void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { - // Define start line table label for each Compile Unit. - MCSymbol *LineTableStartSym = - Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); - - stmtListIndex = UnitDie.getValues().size(); - - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. For split dwarf this is - // left in the skeleton CU and so not included. - // The line table entries are not always emitted in assembly, so it - // is not okay to use line_table_start here. - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym); - else - addSectionDelta(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, - DwarfLineSectionSym); -} - -void DwarfCompileUnit::applyStmtList(DIE &D) { - D.addValue(dwarf::DW_AT_stmt_list, - UnitDie.getAbbrev().getData()[stmtListIndex].getForm(), - UnitDie.getValues()[stmtListIndex]); +void DwarfUnit::initSection(const MCSection *Section) { + assert(!this->Section); + this->Section = Section; } void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { @@ -2055,16 +1647,8 @@ void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { sizeof(Ty->getOffset())); } -void DwarfTypeUnit::initSection(const MCSection *Section) { - assert(!this->Section); - this->Section = Section; - // Since each type unit is contained in its own COMDAT section, the begin - // label and the section label are the same. Using the begin label emission in - // DwarfDebug to emit the section label as well is slightly subtle/sneaky, but - // the only other alternative of lazily constructing start-of-section labels - // and storing a mapping in DwarfDebug (or AsmPrinter). - this->SectionSym = this->LabelBegin = - Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); - this->LabelEnd = - Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); +bool DwarfTypeUnit::isDwoUnit() const { + // Since there are no skeleton type units, all type units are dwo type units + // when split DWARF is being used. + return DD->useSplitDwarf(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index b7b83b2..f40c937 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H -#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H #include "DIE.h" #include "DwarfDebug.h" @@ -55,7 +55,8 @@ private: SmallVector Ranges; public: - RangeSpanList(MCSymbol *Sym) : RangeSym(Sym) {} + RangeSpanList(MCSymbol *Sym, SmallVector Ranges) + : RangeSym(Sym), Ranges(std::move(Ranges)) {} MCSymbol *getSym() const { return RangeSym; } const SmallVectorImpl &getRanges() const { return Ranges; } void addRange(RangeSpan Range) { Ranges.push_back(Range); } @@ -96,12 +97,6 @@ protected: /// descriptors to debug information entries using a DIEEntry proxy. DenseMap MDNodeToDIEEntryMap; - /// GlobalNames - A map of globally visible named entities for this unit. - StringMap GlobalNames; - - /// GlobalTypes - A map of globally visible types for this unit. - StringMap GlobalTypes; - /// DIEBlocks - A list of all the DIEBlocks in use. std::vector DIEBlocks; @@ -113,13 +108,6 @@ protected: /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap ContainingTypeMap; - // List of ranges for a given compile unit. - SmallVector CURanges; - - // List of range lists for a given compile unit, separate from the ranges for - // the CU itself. - SmallVector CURangeLists; - // DIEValueAllocator - All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; @@ -129,86 +117,31 @@ protected: /// The section this unit will be emitted in. const MCSection *Section; - /// A label at the start of the non-dwo section related to this unit. - MCSymbol *SectionSym; + DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); - /// The start of the unit within its section. - MCSymbol *LabelBegin; + void initSection(const MCSection *Section); - /// The end of the unit within its section. - MCSymbol *LabelEnd; + /// Add a string attribute data and value. + void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - /// Skeleton unit associated with this unit. - DwarfUnit *Skeleton; + void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); - DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU); + bool applySubprogramDefinitionAttributes(DISubprogram SP, DIE &SPDie); public: virtual ~DwarfUnit(); - /// Set the skeleton unit associated with this unit. - void setSkeleton(DwarfUnit &Skel) { Skeleton = &Skel; } - - /// Get the skeleton unit associated with this unit. - DwarfUnit *getSkeleton() const { return Skeleton; } - - /// Pass in the SectionSym even though we could recreate it in every compile - /// unit (type units will have actually distinct symbols once they're in - /// comdat sections). - void initSection(const MCSection *Section, MCSymbol *SectionSym) { - assert(!this->Section); - this->Section = Section; - this->SectionSym = SectionSym; - this->LabelBegin = - Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); - this->LabelEnd = - Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); - } - const MCSection *getSection() const { assert(Section); return Section; } - /// If there's a skeleton then return the section symbol for the skeleton - /// unit, otherwise return the section symbol for this unit. - MCSymbol *getLocalSectionSym() const { - if (Skeleton) - return Skeleton->getSectionSym(); - return getSectionSym(); - } - - MCSymbol *getSectionSym() const { - assert(Section); - return SectionSym; - } - - /// If there's a skeleton then return the begin label for the skeleton unit, - /// otherwise return the local label for this unit. - MCSymbol *getLocalLabelBegin() const { - if (Skeleton) - return Skeleton->getLabelBegin(); - return getLabelBegin(); - } - - MCSymbol *getLabelBegin() const { - assert(Section); - return LabelBegin; - } - - MCSymbol *getLabelEnd() const { - assert(Section); - return LabelEnd; - } - // Accessors. unsigned getUniqueID() const { return UniqueID; } uint16_t getLanguage() const { return CUNode.getLanguage(); } DICompileUnit getCUNode() const { return CUNode; } DIE &getUnitDie() { return UnitDie; } - const StringMap &getGlobalNames() const { return GlobalNames; } - const StringMap &getGlobalTypes() const { return GlobalTypes; } unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } @@ -216,29 +149,15 @@ public: /// hasContent - Return true if this compile unit has something to write out. bool hasContent() const { return !UnitDie.getChildren().empty(); } - /// addRange - Add an address range to the list of ranges for this unit. - void addRange(RangeSpan Range); - - /// getRanges - Get the list of ranges for this unit. - const SmallVectorImpl &getRanges() const { return CURanges; } - SmallVectorImpl &getRanges() { return CURanges; } - - /// addRangeList - Add an address range list to the list of range lists. - void addRangeList(RangeSpanList Ranges) { CURangeLists.push_back(Ranges); } - - /// getRangeLists - Get the vector of range lists. - const SmallVectorImpl &getRangeLists() const { - return CURangeLists; - } - SmallVectorImpl &getRangeLists() { return CURangeLists; } - /// getParentContextString - Get a string containing the language specific /// context for a global name. std::string getParentContextString(DIScope Context) const; - /// addGlobalName - Add a new global entity to the compile unit. - /// - void addGlobalName(StringRef Name, DIE &Die, DIScope Context); + /// Add a new global name to the compile unit. + virtual void addGlobalName(StringRef Name, DIE &Die, DIScope Context) {} + + /// Add a new global type to the compile unit. + virtual void addGlobalType(DIType Ty, const DIE &Die, DIScope Context) {} /// addAccelNamespace - Add a new name to the namespace accelerator table. void addAccelNamespace(StringRef Name, const DIE &Die); @@ -275,14 +194,7 @@ public: void addSInt(DIELoc &Die, Optional Form, int64_t Integer); /// addString - Add a string attribute data and value. - void addString(DIE &Die, dwarf::Attribute Attribute, const StringRef Str); - - /// addLocalString - Add a string attribute data and value. - void addLocalString(DIE &Die, dwarf::Attribute Attribute, - const StringRef Str); - - /// addExpr - Add a Dwarf expression attribute data and value. - void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); + void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); /// addLabel - Add a Dwarf label attribute data and value. void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, @@ -290,14 +202,6 @@ public: void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); - /// addLocationList - Add a Dwarf loclistptr attribute data and value. - void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index); - - /// addSectionLabel - Add a Dwarf section label attribute data and value. - /// - void addSectionLabel(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label); - /// addSectionOffset - Add an offset into a section attribute data and value. /// void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer); @@ -306,10 +210,6 @@ public: /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. void addOpAddress(DIELoc &Die, const MCSymbol *Label); - /// addSectionDelta - Add a label delta attribute data and value. - void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, - const MCSymbol *Lo); - /// addLabelDelta - Add a label delta attribute data and value. void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo); @@ -339,11 +239,6 @@ public: void addSourceLine(DIE &Die, DINameSpace NS); void addSourceLine(DIE &Die, DIObjCProperty Ty); - /// addAddress - Add an address attribute to a die based on the location - /// provided. - void addAddress(DIE &Die, dwarf::Attribute Attribute, - const MachineLocation &Location, bool Indirect = false); - /// addConstantValue - Add constant value entry in variable DIE. void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty); void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty); @@ -359,19 +254,12 @@ public: void addTemplateParams(DIE &Buffer, DIArray TParams); /// addRegisterOp - Add register operand. - void addRegisterOp(DIELoc &TheDie, unsigned Reg); + void addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, + unsigned SizeInBits = 0, unsigned OffsetInBits = 0); /// addRegisterOffset - Add register offset. void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); - /// addComplexAddress - Start with the address based on the location provided, - /// and generate the DWARF information necessary to find the actual variable - /// (navigating the extra location information encoded in the type) based on - /// the starting location. Add the DWARF information to the die. - void addComplexAddress(const DbgVariable &DV, DIE &Die, - dwarf::Attribute Attribute, - const MachineLocation &Location); - // FIXME: Should be reformulated in terms of addComplexAddress. /// addBlockByrefAddress - Start with the address based on the location /// provided, and generate the DWARF information necessary to find the @@ -382,11 +270,6 @@ public: dwarf::Attribute Attribute, const MachineLocation &Location); - /// addVariableAddress - Add DW_AT_location attribute for a - /// DbgVariable based on provided MachineLocation. - void addVariableAddress(const DbgVariable &DV, DIE &Die, - MachineLocation Location); - /// addType - Add a new type attribute to the specified entity. This takes /// and attribute parameter because DW_AT_friend attributes are also /// type references. @@ -397,11 +280,10 @@ public: DIE *getOrCreateNameSpace(DINameSpace NS); /// getOrCreateSubprogramDIE - Create new DIE using SP. - DIE *getOrCreateSubprogramDIE(DISubprogram SP); + DIE *getOrCreateSubprogramDIE(DISubprogram SP, bool Minimal = false); - void applySubprogramAttributes(DISubprogram SP, DIE &SPDie); - void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie); - void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); + void applySubprogramAttributes(DISubprogram SP, DIE &SPDie, + bool Minimal = false); /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. @@ -417,12 +299,8 @@ public: /// vtables. void constructContainingTypeDIEs(); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - std::unique_ptr constructVariableDIE(DbgVariable &DV, - bool Abstract = false); - /// constructSubprogramArguments - Construct function argument DIEs. - void constructSubprogramArguments(DIE &Buffer, DIArray Args); + void constructSubprogramArguments(DIE &Buffer, DITypeArray Args); /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. @@ -453,12 +331,13 @@ protected: /// none currently exists, create a new ID and insert it in the line table. virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; -private: - /// \brief Construct a DIE for the given DbgVariable without initializing the - /// DbgVariable's DIE reference. - std::unique_ptr constructVariableDIEImpl(const DbgVariable &DV, - bool Abstract); + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template T resolve(DIRef Ref) const { + return DD->resolve(Ref); + } +private: /// constructTypeDIE - Construct basic type die from DIBasicType. void constructTypeDIE(DIE &Buffer, DIBasicType BTy); @@ -503,7 +382,7 @@ private: } // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { return IndexTyDie; } + DIE *getIndexTyDie(); // setIndexTyDie - Set D as anonymous type for index which can be reused // later. @@ -513,56 +392,22 @@ private: /// information entry. DIEEntry *createDIEEntry(DIE &Entry); - /// resolve - Look in the DwarfDebug map for the MDNode that - /// corresponds to the reference. - template T resolve(DIRef Ref) const { - return DD->resolve(Ref); - } - /// If this is a named finished type then include it in the list of types for /// the accelerator tables. void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE); -}; - -class DwarfCompileUnit : public DwarfUnit { - /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding - /// the need to search for it in applyStmtList. - unsigned stmtListIndex; - -public: - DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU); - - void initStmtList(MCSymbol *DwarfLineSectionSym); - - /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. - void applyStmtList(DIE &D); - - /// createGlobalVariableDIE - create global variable DIE. - void createGlobalVariableDIE(DIGlobalVariable GV); - - /// addLabelAddress - Add a dwarf label attribute data and value using - /// either DW_FORM_addr or DW_FORM_GNU_addr_index. - void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label); - - /// addLocalLabelAddress - Add a dwarf label attribute data and value using - /// DW_FORM_addr only. - void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, - const MCSymbol *Label); - DwarfCompileUnit &getCU() override { return *this; } - - unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + virtual bool isDwoUnit() const = 0; }; class DwarfTypeUnit : public DwarfUnit { -private: uint64_t TypeSignature; const DIE *Ty; DwarfCompileUnit &CU; MCDwarfDwoLineTable *SplitLineTable; + unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; + bool isDwoUnit() const override; + public: DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, @@ -578,11 +423,8 @@ public: return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature sizeof(uint32_t); // Type DIE Offset } - void initSection(const MCSection *Section); + using DwarfUnit::initSection; DwarfCompileUnit &getCU() override { return CU; } - -protected: - unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; }; } // end llvm namespace #endif diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 73f62bf..2bbffb3 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -237,7 +237,7 @@ computeCallSiteTable(SmallVectorImpl &CallSites, // instruction between the previous try-range and this one may throw, // create a call-site entry with no landing pad for the region between the // try-ranges. - if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { + if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) { CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; @@ -259,7 +259,7 @@ computeCallSiteTable(SmallVectorImpl &CallSites, }; // Try to merge with the previous call-site. SJLJ doesn't do this - if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) { + if (PreviousIsInvoke && Asm->MAI->usesItaniumLSDAForExceptions()) { CallSiteEntry &Prev = CallSites.back(); if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { // Extend the range of the previous entry. @@ -269,7 +269,7 @@ computeCallSiteTable(SmallVectorImpl &CallSites, } // Otherwise, create a new call-site. - if (Asm->MAI->isExceptionHandlingDwarf()) + if (Asm->MAI->usesItaniumLSDAForExceptions()) CallSites.push_back(Site); else { // SjLj EH must maintain the call sites in the order assigned @@ -287,7 +287,7 @@ computeCallSiteTable(SmallVectorImpl &CallSites, // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { + if (SawPotentiallyThrowing && Asm->MAI->usesItaniumLSDAForExceptions()) { CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } @@ -314,7 +314,7 @@ computeCallSiteTable(SmallVectorImpl &CallSites, /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. void EHStreamer::emitExceptionTable() { - const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &TypeInfos = MMI->getTypeInfos(); const std::vector &FilterIds = MMI->getFilterIds(); const std::vector &PadInfos = MMI->getLandingPads(); @@ -520,7 +520,7 @@ void EHStreamer::emitExceptionTable() { } } else { // DWARF Exception handling - assert(Asm->MAI->isExceptionHandlingDwarf()); + assert(Asm->MAI->usesItaniumLSDAForExceptions()); // The call-site table is a list of all call sites that may throw an // exception (including C++ 'throw' statements) in the procedure @@ -649,7 +649,7 @@ void EHStreamer::emitExceptionTable() { } void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { - const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &TypeInfos = MMI->getTypeInfos(); const std::vector &FilterIds = MMI->getFilterIds(); bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); @@ -662,9 +662,9 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { Entry = TypeInfos.size(); } - for (std::vector::const_reverse_iterator + for (std::vector::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalVariable *GV = *I; + const GlobalValue *GV = *I; if (VerboseAsm) Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); Asm->EmitTTypeReference(GV, TTypeEncoding); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 2b6ba78..7e9549d 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H -#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H #include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index bfcbe6b..5bda5a9 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -28,6 +28,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -50,7 +51,8 @@ void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { } void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { MCStreamer &OS = AP.OutStreamer; - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + unsigned IntPtrSize = + AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); // Put this in a custom .note section. AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext() diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 5a9ecd7..6480d048 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -91,7 +92,8 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) { /// either condition is detected in a function which uses the GC. /// void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { - unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + unsigned IntPtrSize = + AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); EmitCamlGlobal(getModule(), AP, "code_end"); diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 81285d5..0f0ad75 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "DwarfException.h" +#include "Win64Exception.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.h b/lib/CodeGen/AsmPrinter/Win64Exception.h new file mode 100644 index 0000000..538e132 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/Win64Exception.h @@ -0,0 +1,52 @@ +//===-- Win64Exception.h - Windows Exception Handling ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing windows exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H + +#include "EHStreamer.h" + +namespace llvm { +class MachineFunction; + +class Win64Exception : public EHStreamer { + /// Per-function flag to indicate if personality info should be emitted. + bool shouldEmitPersonality; + + /// Per-function flag to indicate if the LSDA should be emitted. + bool shouldEmitLSDA; + + /// Per-function flag to indicate if frame moves info should be emitted. + bool shouldEmitMoves; + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + Win64Exception(AsmPrinter *A); + virtual ~Win64Exception(); + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; +}; +} + +#endif + diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 6a5c431..b5e0929 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -116,15 +116,67 @@ WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) Asm = AP; } +void WinCodeViewLineTables::endModule() { + if (FnDebugInfo.empty()) + return; + + assert(Asm != nullptr); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); + Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); + + // The COFF .debug$S section consists of several subsections, each starting + // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length + // of the payload followed by the payload itself. The subsections are 4-byte + // aligned. + + // Emit per-function debug information. This code is extracted into a + // separate function for readability. + for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) + emitDebugInfoForFunction(VisitedFunctions[I]); + + // This subsection holds a file index to offset in string table table. + Asm->OutStreamer.AddComment("File index to string table offset subsection"); + Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); + size_t NumFilenames = FileNameRegistry.Infos.size(); + Asm->EmitInt32(8 * NumFilenames); + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + StringRef Filename = FileNameRegistry.Filenames[I]; + // For each unique filename, just write its offset in the string table. + Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); + // The function name offset is not followed by any additional data. + Asm->EmitInt32(0); + } + + // This subsection holds the string table. + Asm->OutStreamer.AddComment("String table"); + Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); + Asm->EmitInt32(FileNameRegistry.LastOffset); + // The payload starts with a null character. + Asm->EmitInt8(0); + + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + // Just emit unique filenames one by one, separated by a null character. + Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]); + Asm->EmitInt8(0); + } + + // No more subsections. Fill with zeros to align the end of the section by 4. + Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0); + + clear(); +} + static void EmitLabelDiff(MCStreamer &Streamer, - const MCSymbol *From, const MCSymbol *To) { + const MCSymbol *From, const MCSymbol *To, + unsigned int Size = 4) { MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; MCContext &Context = Streamer.getContext(); const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context), *ToRef = MCSymbolRefExpr::Create(To, Variant, Context); const MCExpr *AddrDelta = MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context); - Streamer.EmitValue(AddrDelta, 4); + Streamer.EmitValue(AddrDelta, Size); } void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { @@ -138,6 +190,51 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { return; assert(FI.End && "Don't know where the function ends?"); + StringRef FuncName = getDISubprogram(GV).getDisplayName(), + GVName = GV->getName(); + // FIXME Clang currently sets DisplayName to "bar" for a C++ + // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying + // to demangle display names anyways, so let's just put a mangled name into + // the symbols subsection until Clang gives us what we need. + if (GVName.startswith("\01?")) + FuncName = GVName.substr(1); + // Emit a symbol subsection, required by VS2012+ to find function boundaries. + MCSymbol *SymbolsBegin = Asm->MMI->getContext().CreateTempSymbol(), + *SymbolsEnd = Asm->MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.AddComment("Symbol subsection for " + Twine(FuncName)); + Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION); + EmitLabelDiff(Asm->OutStreamer, SymbolsBegin, SymbolsEnd); + Asm->OutStreamer.EmitLabel(SymbolsBegin); + { + MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(), + *ProcSegmentEnd = Asm->MMI->getContext().CreateTempSymbol(); + EmitLabelDiff(Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); + Asm->OutStreamer.EmitLabel(ProcSegmentBegin); + + Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START); + // Some bytes of this segment don't seem to be required for basic debugging, + // so just fill them with zeroes. + Asm->OutStreamer.EmitFill(12, 0); + // This is the important bit that tells the debugger where the function + // code is located and what's its size: + EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); + Asm->OutStreamer.EmitFill(12, 0); + Asm->OutStreamer.EmitCOFFSecRel32(Fn); + Asm->OutStreamer.EmitCOFFSectionIndex(Fn); + Asm->EmitInt8(0); + // Emit the function display name as a null-terminated string. + Asm->OutStreamer.EmitBytes(FuncName); + Asm->EmitInt8(0); + Asm->OutStreamer.EmitLabel(ProcSegmentEnd); + + // We're done with this function. + Asm->EmitInt16(0x0002); + Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END); + } + Asm->OutStreamer.EmitLabel(SymbolsEnd); + // Every subsection must be aligned to a 4-byte boundary. + Asm->OutStreamer.EmitFill((-FuncName.size()) % 4, 0); + // PCs/Instructions are grouped into segments sharing the same filename. // Pre-calculate the lengths (in instructions) of these segments and store // them in a map for convenience. Each index in the map is the sequential @@ -154,18 +251,19 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { } FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; - // Emit the control code of the subsection followed by the payload size. - Asm->OutStreamer.AddComment( - "Linetable subsection for " + Twine(Fn->getName())); + // Emit a line table subsection, requred to do PC-to-file:line lookup. + Asm->OutStreamer.AddComment("Line table subsection for " + Twine(FuncName)); Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); - MCSymbol *SubsectionBegin = Asm->MMI->getContext().CreateTempSymbol(), - *SubsectionEnd = Asm->MMI->getContext().CreateTempSymbol(); - EmitLabelDiff(Asm->OutStreamer, SubsectionBegin, SubsectionEnd); - Asm->OutStreamer.EmitLabel(SubsectionBegin); + MCSymbol *LineTableBegin = Asm->MMI->getContext().CreateTempSymbol(), + *LineTableEnd = Asm->MMI->getContext().CreateTempSymbol(); + EmitLabelDiff(Asm->OutStreamer, LineTableBegin, LineTableEnd); + Asm->OutStreamer.EmitLabel(LineTableBegin); // Identify the function this subsection is for. Asm->OutStreamer.EmitCOFFSecRel32(Fn); Asm->OutStreamer.EmitCOFFSectionIndex(Fn); + // Insert padding after a 16-bit section index. + Asm->EmitInt16(0); // Length of the function's code, in bytes. EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); @@ -209,56 +307,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { if (FileSegmentEnd) Asm->OutStreamer.EmitLabel(FileSegmentEnd); - Asm->OutStreamer.EmitLabel(SubsectionEnd); -} - -void WinCodeViewLineTables::endModule() { - if (FnDebugInfo.empty()) - return; - - assert(Asm != nullptr); - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); - Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); - - // The COFF .debug$S section consists of several subsections, each starting - // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length - // of the payload followed by the payload itself. The subsections are 4-byte - // aligned. - - for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) - emitDebugInfoForFunction(VisitedFunctions[I]); - - // This subsection holds a file index to offset in string table table. - Asm->OutStreamer.AddComment("File index to string table offset subsection"); - Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); - size_t NumFilenames = FileNameRegistry.Infos.size(); - Asm->EmitInt32(8 * NumFilenames); - for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { - StringRef Filename = FileNameRegistry.Filenames[I]; - // For each unique filename, just write it's offset in the string table. - Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); - // The function name offset is not followed by any additional data. - Asm->EmitInt32(0); - } - - // This subsection holds the string table. - Asm->OutStreamer.AddComment("String table"); - Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); - Asm->EmitInt32(FileNameRegistry.LastOffset); - // The payload starts with a null character. - Asm->EmitInt8(0); - - for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { - // Just emit unique filenames one by one, separated by a null character. - Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]); - Asm->EmitInt8(0); - } - - // No more subsections. Fill with zeros to align the end of the section by 4. - Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0); - - clear(); + Asm->OutStreamer.EmitLabel(LineTableEnd); } void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h index 0734d97..8492eac 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__ -#define CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H__ +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H #include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp deleted file mode 100644 index 421946d..0000000 --- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp +++ /dev/null @@ -1,380 +0,0 @@ -//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass (at IR level) to replace atomic instructions with -// appropriate (intrinsic-based) ldrex/strex loops. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "arm-atomic-expand" - -namespace { - class AtomicExpandLoadLinked : public FunctionPass { - const TargetMachine *TM; - public: - static char ID; // Pass identification, replacement for typeid - explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { - initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - bool expandAtomicInsts(Function &F); - - bool expandAtomicLoad(LoadInst *LI); - bool expandAtomicStore(StoreInst *LI); - bool expandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); - - AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - }; -} - -char AtomicExpandLoadLinked::ID = 0; -char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID; -INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc", - "Expand Atomic calls in terms of load-linked & store-conditional", - false, false) - -FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { - return new AtomicExpandLoadLinked(TM); -} - -bool AtomicExpandLoadLinked::runOnFunction(Function &F) { - if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked()) - return false; - - SmallVector AtomicInsts; - - // Changing control-flow while iterating through it is a bad idea, so gather a - // list of all atomic instructions before we start. - for (BasicBlock &BB : F) - for (Instruction &Inst : BB) { - if (isa(&Inst) || isa(&Inst) || - (isa(&Inst) && cast(&Inst)->isAtomic()) || - (isa(&Inst) && cast(&Inst)->isAtomic())) - AtomicInsts.push_back(&Inst); - } - - bool MadeChange = false; - for (Instruction *Inst : AtomicInsts) { - if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst)) - continue; - - if (AtomicRMWInst *AI = dyn_cast(Inst)) - MadeChange |= expandAtomicRMW(AI); - else if (AtomicCmpXchgInst *CI = dyn_cast(Inst)) - MadeChange |= expandAtomicCmpXchg(CI); - else if (LoadInst *LI = dyn_cast(Inst)) - MadeChange |= expandAtomicLoad(LI); - else if (StoreInst *SI = dyn_cast(Inst)) - MadeChange |= expandAtomicStore(SI); - else - llvm_unreachable("Unknown atomic instruction"); - } - - return MadeChange; -} - -bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { - // Load instructions don't actually need a leading fence, even in the - // SequentiallyConsistent case. - AtomicOrdering MemOpOrder = - TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic - : LI->getOrdering(); - - // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is - // an ldrexd (A3.5.3). - IRBuilder<> Builder(LI); - Value *Val = TM->getTargetLowering()->emitLoadLinked( - Builder, LI->getPointerOperand(), MemOpOrder); - - insertTrailingFence(Builder, LI->getOrdering()); - - LI->replaceAllUsesWith(Val); - LI->eraseFromParent(); - - return true; -} - -bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) { - // The only atomic 64-bit store on ARM is an strexd that succeeds, which means - // we need a loop and the entire instruction is essentially an "atomicrmw - // xchg" that ignores the value loaded. - IRBuilder<> Builder(SI); - AtomicRMWInst *AI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), SI->getOrdering()); - SI->eraseFromParent(); - - // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); -} - -bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { - AtomicOrdering Order = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // fence? - // atomicrmw.start: - // %loaded = @load.linked(%addr) - // %new = some_op iN %loaded, %incr - // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %atomicrmw.end - // atomicrmw.end: - // fence? - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = - TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); - - Value *NewVal; - switch (AI->getOperation()) { - case AtomicRMWInst::Xchg: - NewVal = AI->getValOperand(); - break; - case AtomicRMWInst::Add: - NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Sub: - NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::And: - NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Nand: - NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), - "new"); - break; - case AtomicRMWInst::Or: - NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Xor: - NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - default: - llvm_unreachable("Unknown atomic op"); - } - - Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( - Builder, NewVal, Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( - StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); - Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); - - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - insertTrailingFence(Builder, Order); - - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); - - return true; -} - -bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { - AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); - AtomicOrdering FailureOrder = CI->getFailureOrdering(); - Value *Addr = CI->getPointerOperand(); - BasicBlock *BB = CI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord - // - // The full expansion we produce is: - // [...] - // fence? - // cmpxchg.start: - // %loaded = @load.linked(%addr) - // %should_store = icmp eq %loaded, %desired - // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.failure - // cmpxchg.trystore: - // %stored = @store_conditional(%new, %addr) - // %success = icmp eq i32 %stored, 0 - // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure - // cmpxchg.success: - // fence? - // br label %cmpxchg.end - // cmpxchg.failure: - // fence? - // br label %cmpxchg.end - // cmpxchg.end: - // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] - // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 - // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); - auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); - auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); - auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); - - // This grabs the DebugLoc from CI - IRBuilder<> Builder(CI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = - TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *ShouldStore = - Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); - - // If the the cmpxchg doesn't actually need any ordering when it fails, we can - // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); - - Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( - Builder, CI->getNewValOperand(), Addr, MemOpOrder); - StoreSuccess = Builder.CreateICmpEQ( - StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); - Builder.CreateCondBr(StoreSuccess, SuccessBB, - CI->isWeak() ? FailureBB : LoopBB); - - // Make sure later instructions don't get reordered with a fence if necessary. - Builder.SetInsertPoint(SuccessBB); - insertTrailingFence(Builder, SuccessOrder); - Builder.CreateBr(ExitBB); - - Builder.SetInsertPoint(FailureBB); - insertTrailingFence(Builder, FailureOrder); - Builder.CreateBr(ExitBB); - - // Finally, we have control-flow based knowledge of whether the cmpxchg - // succeeded or not. We expose this to later passes by converting any - // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. - - // Setup the builder so we can create any PHIs we need. - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); - Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); - Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); - - // Look for any users of the cmpxchg that are just comparing the loaded value - // against the desired one, and replace them with the CFG-derived version. - SmallVector PrunedInsts; - for (auto User : CI->users()) { - ExtractValueInst *EV = dyn_cast(User); - if (!EV) - continue; - - assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && - "weird extraction from { iN, i1 }"); - - if (EV->getIndices()[0] == 0) - EV->replaceAllUsesWith(Loaded); - else - EV->replaceAllUsesWith(Success); - - PrunedInsts.push_back(EV); - } - - // We can remove the instructions now we're no longer iterating through them. - for (auto EV : PrunedInsts) - EV->eraseFromParent(); - - if (!CI->use_empty()) { - // Some use of the full struct return that we don't understand has happened, - // so we've got to reconstruct it properly. - Value *Res; - Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); - Res = Builder.CreateInsertValue(Res, Success, 1); - - CI->replaceAllUsesWith(Res); - } - - CI->eraseFromParent(); - return true; -} - -AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TM->getTargetLowering()->getInsertFencesForAtomic()) - return Ord; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - Builder.CreateFence(Release); - - // The exclusive operations don't need any barrier if we're adding separate - // fences. - return Monotonic; -} - -void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TM->getTargetLowering()->getInsertFencesForAtomic()) - return; - - if (Ord == Acquire || Ord == AcquireRelease) - Builder.CreateFence(Acquire); - else if (Ord == SequentiallyConsistent) - Builder.CreateFence(SequentiallyConsistent); -} diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp new file mode 100644 index 0000000..12f6bd7 --- /dev/null +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -0,0 +1,563 @@ +//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions with +// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "atomic-expand" + +namespace { + class AtomicExpand: public FunctionPass { + const TargetMachine *TM; + public: + static char ID; // Pass identification, replacement for typeid + explicit AtomicExpand(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + private: + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad); + bool expandAtomicLoad(LoadInst *LI); + bool expandAtomicLoadToLL(LoadInst *LI); + bool expandAtomicLoadToCmpXchg(LoadInst *LI); + bool expandAtomicStore(StoreInst *SI); + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicRMWToLLSC(AtomicRMWInst *AI); + bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + bool isIdempotentRMW(AtomicRMWInst *AI); + bool simplifyIdempotentRMW(AtomicRMWInst *AI); + }; +} + +char AtomicExpand::ID = 0; +char &llvm::AtomicExpandID = AtomicExpand::ID; +INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", + "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg", + false, false) + +FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { + return new AtomicExpand(TM); +} + +bool AtomicExpand::runOnFunction(Function &F) { + if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand()) + return false; + auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); + + SmallVector AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (I->isAtomic()) + AtomicInsts.push_back(&*I); + } + + bool MadeChange = false; + for (auto I : AtomicInsts) { + auto LI = dyn_cast(I); + auto SI = dyn_cast(I); + auto RMWI = dyn_cast(I); + auto CASI = dyn_cast(I); + assert((LI || SI || RMWI || CASI || isa(I)) && + "Unknown atomic instruction"); + + auto FenceOrdering = Monotonic; + bool IsStore, IsLoad; + if (TargetLowering->getInsertFencesForAtomic()) { + if (LI && isAtLeastAcquire(LI->getOrdering())) { + FenceOrdering = LI->getOrdering(); + LI->setOrdering(Monotonic); + IsStore = false; + IsLoad = true; + } else if (SI && isAtLeastRelease(SI->getOrdering())) { + FenceOrdering = SI->getOrdering(); + SI->setOrdering(Monotonic); + IsStore = true; + IsLoad = false; + } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || + isAtLeastAcquire(RMWI->getOrdering()))) { + FenceOrdering = RMWI->getOrdering(); + RMWI->setOrdering(Monotonic); + IsStore = IsLoad = true; + } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && + (isAtLeastRelease(CASI->getSuccessOrdering()) || + isAtLeastAcquire(CASI->getSuccessOrdering()))) { + // If a compare and swap is lowered to LL/SC, we can do smarter fence + // insertion, with a stronger one on the success path than on the + // failure path. As a result, fence insertion is directly done by + // expandAtomicCmpXchg in that case. + FenceOrdering = CASI->getSuccessOrdering(); + CASI->setSuccessOrdering(Monotonic); + CASI->setFailureOrdering(Monotonic); + IsStore = IsLoad = true; + } + + if (FenceOrdering != Monotonic) { + MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); + } + } + + if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { + MadeChange |= expandAtomicLoad(LI); + } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { + MadeChange |= expandAtomicStore(SI); + } else if (RMWI) { + // There are two different ways of expanding RMW instructions: + // - into a load if it is idempotent + // - into a Cmpxchg/LL-SC loop otherwise + // we try them in that order. + MadeChange |= (isIdempotentRMW(RMWI) && + simplifyIdempotentRMW(RMWI)) || + (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) && + expandAtomicRMW(RMWI)); + } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) { + MadeChange |= expandAtomicCmpXchg(CASI); + } + } + return MadeChange; +} + +bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad) { + IRBuilder<> Builder(I); + + auto LeadingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( + Builder, Order, IsStore, IsLoad); + + auto TrailingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( + Builder, Order, IsStore, IsLoad); + // The trailing fence is emitted before the instruction instead of after + // because there is no easy way of setting Builder insertion point after + // an instruction. So we must erase it from the BB, and insert it back + // in the right place. + // We have a guard here because not every atomic operation generates a + // trailing fence. + if (TrailingFence) { + TrailingFence->removeFromParent(); + TrailingFence->insertAfter(I); + } + + return (LeadingFence || TrailingFence); +} + +bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { + if (TM->getSubtargetImpl() + ->getTargetLowering() + ->hasLoadLinkedStoreConditional()) + return expandAtomicLoadToLL(LI); + else + return expandAtomicLoadToCmpXchg(LI); +} + +bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { + auto TLI = TM->getSubtargetImpl()->getTargetLowering(); + IRBuilder<> Builder(LI); + + // On some architectures, load-linked instructions are atomic for larger + // sizes than normal loads. For example, the only 64-bit load guaranteed + // to be single-copy atomic by ARM is an ldrexd (A3.5.3). + Value *Val = + TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + + LI->replaceAllUsesWith(Val); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { + IRBuilder<> Builder(LI); + AtomicOrdering Order = LI->getOrdering(); + Value *Addr = LI->getPointerOperand(); + Type *Ty = cast(Addr->getType())->getElementType(); + Constant *DummyVal = Constant::getNullValue(Ty); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, DummyVal, DummyVal, Order, + AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); + Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); + + LI->replaceAllUsesWith(Loaded); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicStore(StoreInst *SI) { + // This function is only called on atomic stores that are too large to be + // atomic if implemented as a native store. So we replace them by an + // atomic swap, that can be implemented for example as a ldrex/strex on ARM + // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. + // It is the responsibility of the target to only return true in + // shouldExpandAtomicRMW in cases where this is required and possible. + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + return expandAtomicRMW(AI); +} + +bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) { + if (TM->getSubtargetImpl() + ->getTargetLowering() + ->hasLoadLinkedStoreConditional()) + return expandAtomicRMWToLLSC(AI); + else + return expandAtomicRMWToCmpXchg(AI); +} + +/// Emit IR to implement the given atomicrmw operation on values in registers, +/// returning the new value. +static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, + Value *Loaded, Value *Inc) { + Value *NewVal; + switch (Op) { + case AtomicRMWInst::Xchg: + return Inc; + case AtomicRMWInst::Add: + return Builder.CreateAdd(Loaded, Inc, "new"); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Loaded, Inc, "new"); + case AtomicRMWInst::And: + return Builder.CreateAnd(Loaded, Inc, "new"); + case AtomicRMWInst::Nand: + return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + case AtomicRMWInst::Or: + return Builder.CreateOr(Loaded, Inc, "new"); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Loaded, Inc, "new"); + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + default: + llvm_unreachable("Unknown atomic op"); + } +} + +bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { + auto TLI = TM->getSubtargetImpl()->getTargetLowering(); + AtomicOrdering MemOpOrder = AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // fence? + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + // atomicrmw.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *StoreSuccess = + TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { + AtomicOrdering MemOpOrder = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + // Atomics require at least natural alignment. + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + Loaded->addIncoming(NewLoaded, LoopBB); + + Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + AI->replaceAllUsesWith(NewLoaded); + AI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + auto TLI = TM->getSubtargetImpl()->getTargetLowering(); + AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); + AtomicOrdering FailureOrder = CI->getFailureOrdering(); + Value *Addr = CI->getPointerOperand(); + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + // If getInsertFencesForAtomic() returns true, then the target does not want + // to deal with memory orders, and emitLeading/TrailingFence should take care + // of everything. Otherwise, emitLeading/TrailingFence are no-op and we + // should preserve the ordering. + AtomicOrdering MemOpOrder = + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; + + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord + // + // The full expansion we produce is: + // [...] + // fence? + // cmpxchg.start: + // %loaded = @load.linked(%addr) + // %should_store = icmp eq %loaded, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.failure + // cmpxchg.trystore: + // %stored = @store_conditional(%new, %addr) + // %success = icmp eq i32 %stored, 0 + // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // cmpxchg.success: + // fence? + // br label %cmpxchg.end + // cmpxchg.failure: + // fence? + // br label %cmpxchg.end + // cmpxchg.end: + // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 + // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); + auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + + // This grabs the DebugLoc from CI + IRBuilder<> Builder(CI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = + Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + + // If the the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + + Builder.SetInsertPoint(TryStoreBB); + Value *StoreSuccess = TLI->emitStoreConditional( + Builder, CI->getNewValOperand(), Addr, MemOpOrder); + StoreSuccess = Builder.CreateICmpEQ( + StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + Builder.CreateCondBr(StoreSuccess, SuccessBB, + CI->isWeak() ? FailureBB : LoopBB); + + // Make sure later instructions don't get reordered with a fence if necessary. + Builder.SetInsertPoint(SuccessBB); + TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(ExitBB); + + Builder.SetInsertPoint(FailureBB); + TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(ExitBB); + + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + SmallVector PrunedInsts; + for (auto User : CI->users()) { + ExtractValueInst *EV = dyn_cast(User); + if (!EV) + continue; + + assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && + "weird extraction from { iN, i1 }"); + + if (EV->getIndices()[0] == 0) + EV->replaceAllUsesWith(Loaded); + else + EV->replaceAllUsesWith(Success); + + PrunedInsts.push_back(EV); + } + + // We can remove the instructions now we're no longer iterating through them. + for (auto EV : PrunedInsts) + EV->eraseFromParent(); + + if (!CI->use_empty()) { + // Some use of the full struct return that we don't understand has happened, + // so we've got to reconstruct it properly. + Value *Res; + Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + } + + CI->eraseFromParent(); + return true; +} + +bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { + auto C = dyn_cast(RMWI->getValOperand()); + if(!C) + return false; + + AtomicRMWInst::BinOp Op = RMWI->getOperation(); + switch(Op) { + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + return C->isZero(); + case AtomicRMWInst::And: + return C->isMinusOne(); + // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... + default: + return false; + } +} + +bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { + auto TLI = TM->getSubtargetImpl()->getTargetLowering(); + + if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { + if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad)) + expandAtomicLoad(ResultingLoad); + return true; + } + + return false; +} diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index b2737bf..b9b1fd8 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -42,7 +42,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo { /// Estimate the cost overhead of SK_Alternate shuffle. unsigned getAltShuffleOverhead(Type *Ty) const; - const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } + const TargetLoweringBase *getTLI() const { + return TM->getSubtargetImpl()->getTargetLowering(); + } public: BasicTTI() : ImmutablePass(ID), TM(nullptr) { @@ -90,7 +92,7 @@ public: unsigned getJumpBufSize() const override; bool shouldBuildLookupTables() const override; bool haveFastSqrt(Type *Ty) const override; - void getUnrollingPreferences(Loop *L, + void getUnrollingPreferences(const Function *F, Loop *L, UnrollingPreferences &UP) const override; /// @} @@ -99,10 +101,11 @@ public: /// @{ unsigned getNumberOfRegisters(bool Vector) const override; - unsigned getMaximumUnrollFactor() const override; + unsigned getMaxInterleaveFactor() const override; unsigned getRegisterBitWidth(bool Vector) const override; unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, - OperandValueKind) const override; + OperandValueKind, OperandValueProperties, + OperandValueProperties) const override; unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const override; unsigned getCastInstrCost(unsigned Opcode, Type *Dst, @@ -186,9 +189,8 @@ unsigned BasicTTI::getJumpBufSize() const { bool BasicTTI::shouldBuildLookupTables() const { const TargetLoweringBase *TLI = getTLI(); - return TLI->supportJumpTables() && - (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); + return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } bool BasicTTI::haveFastSqrt(Type *Ty) const { @@ -197,7 +199,7 @@ bool BasicTTI::haveFastSqrt(Type *Ty) const { return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); } -void BasicTTI::getUnrollingPreferences(Loop *L, +void BasicTTI::getUnrollingPreferences(const Function *F, Loop *L, UnrollingPreferences &UP) const { // This unrolling functionality is target independent, but to provide some // motivation for its intended use, for x86: @@ -223,11 +225,11 @@ void BasicTTI::getUnrollingPreferences(Loop *L, // until someone finds a case where it matters in practice. unsigned MaxOps; - const TargetSubtargetInfo *ST = &TM->getSubtarget(); + const TargetSubtargetInfo *ST = &TM->getSubtarget(F); if (PartialUnrollingThreshold.getNumOccurrences() > 0) MaxOps = PartialUnrollingThreshold; - else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0) - MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize; + else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) + MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; else return; @@ -282,13 +284,14 @@ unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { return 32; } -unsigned BasicTTI::getMaximumUnrollFactor() const { +unsigned BasicTTI::getMaxInterleaveFactor() const { return 1; } unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind, - OperandValueKind) const { + OperandValueKind, OperandValueKind, + OperandValueProperties, + OperandValueProperties) const { // Check if any of the operands are vector operands. const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -465,7 +468,8 @@ unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, std::pair LT = TLI->getTypeLegalizationCost(ValTy); - if (!TLI->isOperationExpand(ISD, LT.second)) { + if (!(ValTy->isVectorTy() && !LT.second.isVector()) && + !TLI->isOperationExpand(ISD, LT.second)) { // The operation is legal. Assume it costs 1. Multiply // by the type-legalization overhead. return LT.first * 1; @@ -561,6 +565,8 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::log10: ISD = ISD::FLOG10; break; case Intrinsic::log2: ISD = ISD::FLOG2; break; case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::minnum: ISD = ISD::FMINNUM; break; + case Intrinsic::maxnum: ISD = ISD::FMAXNUM; break; case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; @@ -572,6 +578,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; + // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return 0; @@ -582,7 +589,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that thre is some + // If the type is split to multiple registers, assume that there is some // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. if (LT.first > 1) diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 7503e57..2128da1 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -20,6 +20,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -32,8 +34,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -70,6 +72,8 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -91,22 +95,24 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { // HW that requires structurized CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && PassConfig->getEnableTailMerge(); - BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true); - return Folder.OptimizeFunction(MF, - MF.getTarget().getInstrInfo(), - MF.getTarget().getRegisterInfo(), + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, + getAnalysis(), + getAnalysis()); + return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), + MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable()); } - -BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist) { +BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, + const MachineBlockFrequencyInfo &FreqInfo, + const MachineBranchProbabilityInfo &ProbInfo) + : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo), + MBPI(ProbInfo) { switch (FlagEnableTailMerge) { case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; case cl::BOU_TRUE: EnableTailMerge = true; break; case cl::BOU_FALSE: EnableTailMerge = false; break; } - - EnableHoistCommonCode = CommonHoist; } /// RemoveDeadBlock - Remove the specified dead machine basic block from the @@ -388,10 +394,8 @@ void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, RS->enterBasicBlock(CurMBB); if (!CurMBB->empty()) RS->forward(std::prev(CurMBB->end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++) - if (RegsLiveAtExit[i]) + for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++) + if (RS->isRegUsed(i, false)) NewMBB->addLiveIn(i); } } @@ -435,6 +439,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Splice the code over. NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + // NewMBB inherits CurMBB's block frequency. + MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); + // For targets that use the register scavenger, we must maintain LiveIns. MaintainLiveIns(&CurMBB, NewMBB); @@ -504,6 +511,21 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { #endif } +BlockFrequency +BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { + auto I = MergedBBFreq.find(MBB); + + if (I != MergedBBFreq.end()) + return I->second; + + return MBFI.getBlockFreq(MBB); +} + +void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, + BlockFrequency F) { + MergedBBFreq[MBB] = F; +} + /// CountTerminators - Count the number of terminators in the given /// block and set I to the position of the first non-terminator, if there /// is one, or MBB->end() otherwise. @@ -806,6 +828,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, } MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + // Recompute commont tail MBB's edge weights and block frequency. + setCommonTailEdgeWeights(*MBB); + // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() @@ -890,7 +916,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // Visit each predecessor only once. - if (!UniquePreds.insert(PBB)) + if (!UniquePreds.insert(PBB).second) continue; // Skip blocks which may jump to a landing pad. Can't tail merge these. @@ -968,6 +994,44 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { return MadeChange; } +void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) { + SmallVector EdgeFreqLs(TailMBB.succ_size()); + BlockFrequency AccumulatedMBBFreq; + + // Aggregate edge frequency of successor edge j: + // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)), + // where bb is a basic block that is in SameTails. + for (const auto &Src : SameTails) { + const MachineBasicBlock *SrcMBB = Src.getBlock(); + BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB); + AccumulatedMBBFreq += BlockFreq; + + // It is not necessary to recompute edge weights if TailBB has less than two + // successors. + if (TailMBB.succ_size() <= 1) + continue; + + auto EdgeFreq = EdgeFreqLs.begin(); + + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) + *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI); + } + + MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq); + + if (TailMBB.succ_size() <= 1) + return; + + auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end()); + uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1; + auto EdgeFreq = EdgeFreqLs.begin(); + + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) + TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale); +} + //===----------------------------------------------------------------------===// // Branch Optimization //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 0d15ed7..3653a2c 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -7,14 +7,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP -#define LLVM_CODEGEN_BRANCHFOLDING_HPP +#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H +#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/BlockFrequency.h" #include namespace llvm { + class MachineBlockFrequencyInfo; + class MachineBranchProbabilityInfo; class MachineFunction; class MachineModuleInfo; class RegScavenger; @@ -23,7 +26,9 @@ namespace llvm { class BranchFolder { public: - explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist); + explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, + const MachineBlockFrequencyInfo &MBFI, + const MachineBranchProbabilityInfo &MBPI); bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii, @@ -92,9 +97,26 @@ namespace llvm { MachineModuleInfo *MMI; RegScavenger *RS; + /// \brief This class keeps track of branch frequencies of newly created + /// blocks and tail-merged blocks. + class MBFIWrapper { + public: + MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} + BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; + void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); + + private: + const MachineBlockFrequencyInfo &MBFI; + DenseMap MergedBBFreq; + }; + + MBFIWrapper MBBFreqInfo; + const MachineBranchProbabilityInfo &MBPI; + bool TailMergeBlocks(MachineFunction &MF); bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); + void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB); void MaintainLiveIns(MachineBasicBlock *CurMBB, MachineBasicBlock *NewMBB); void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 57c24e8..092346b 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -2,7 +2,7 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp AllocationOrder.cpp Analysis.cpp - AtomicExpandLoadLinkedPass.cpp + AtomicExpandPass.cpp BasicTargetTransformInfo.cpp BranchFolding.cpp CalcSpillWeights.cpp @@ -19,6 +19,7 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp + ForwardControlFlowIntegrity.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp @@ -27,7 +28,6 @@ add_llvm_library(LLVMCodeGen InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp - JITCodeEmitter.cpp JumpInstrTables.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp @@ -48,9 +48,10 @@ add_llvm_library(LLVMCodeGen MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp MachineCSE.cpp - MachineCodeEmitter.cpp + MachineCombiner.cpp MachineCopyPropagation.cpp MachineDominators.cpp + MachineDominanceFrontier.cpp MachineFunction.cpp MachineFunctionAnalysis.cpp MachineFunctionPass.cpp @@ -64,6 +65,7 @@ add_llvm_library(LLVMCodeGen MachinePassRegistry.cpp MachinePostDominators.cpp MachineRegisterInfo.cpp + MachineRegionInfo.cpp MachineSSAUpdater.cpp MachineScheduler.cpp MachineSink.cpp @@ -96,7 +98,6 @@ add_llvm_library(LLVMCodeGen SjLjEHPrepare.cpp SlotIndexes.cpp SpillPlacement.cpp - Spiller.cpp SplitKit.cpp StackColoring.cpp StackProtector.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index bc033f9..d08fae0 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -16,8 +16,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "calcspillweights" @@ -95,11 +95,12 @@ static bool isRematerializable(const LiveInterval &LI, void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); - const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo(); MachineBasicBlock *mbb = nullptr; MachineLoop *loop = nullptr; bool isExiting = false; float totalWeight = 0; + unsigned numInstr = 0; // Number of instructions using li SmallPtrSet visited; // Find the best physreg hint and the best virtreg hint. @@ -116,9 +117,10 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { MachineInstr *mi = &*(I++); + numInstr++; if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) continue; - if (!visited.insert(mi)) + if (!visited.insert(mi).second) continue; float weight = 1.0f; @@ -186,8 +188,8 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // it is a preferred candidate for spilling. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. - if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) + if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalize(totalWeight, li.getSize()); + li.weight = normalize(totalWeight, li.getSize(), numInstr); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index add861a..56ecde0 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -19,16 +19,15 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, - const TargetMachine &tm, SmallVectorImpl &locs, - LLVMContext &C) - : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), - CallOrPrologue(Unknown) { + SmallVectorImpl &locs, LLVMContext &C) + : CallingConv(CC), IsVarArg(isVarArg), MF(mf), + TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C), + CallOrPrologue(Unknown) { // No stack is used. StackOffset = 0; @@ -50,7 +49,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, if (MinAlign > (int)Align) Align = MinAlign; MF.getFrameInfo()->ensureMaxAlignment(Align); - TM.getTargetLowering()->HandleByVal(this, Size, Align); + MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align); + Size = unsigned(RoundUpToAlignment(Size, MinAlign)); unsigned Offset = AllocateStack(Size, Align); addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index b3beac3..307dec5 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -20,7 +20,7 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { - initializeAtomicExpandLoadLinkedPass(Registry); + initializeAtomicExpandPass(Registry); initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); @@ -41,6 +41,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMachineBlockPlacementPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); initializeMachineCopyPropagationPass(Registry); + initializeMachineCombinerPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachinePostDominatorTreePass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index ccac40c..8d20848 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -63,6 +64,7 @@ STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); +STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); static cl::opt DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -80,15 +82,29 @@ static cl::opt EnableAndCmpSinking( "enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches.")); +static cl::opt DisableStoreExtract( + "disable-cgp-store-extract", cl::Hidden, cl::init(false), + cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); + +static cl::opt StressStoreExtract( + "stress-cgp-store-extract", cl::Hidden, cl::init(false), + cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); + namespace { typedef SmallPtrSet SetOfInstrs; -typedef DenseMap InstrToOrigTy; +struct TypeIsSExt { + Type *Ty; + bool IsSExt; + TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {} +}; +typedef DenseMap InstrToOrigTy; class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetMachine *TM; const TargetLowering *TLI; + const TargetTransformInfo *TTI; const TargetLibraryInfo *TLInfo; DominatorTree *DT; @@ -118,7 +134,7 @@ typedef DenseMap InstrToOrigTy; public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr) { + : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) { initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -128,6 +144,7 @@ typedef DenseMap InstrToOrigTy; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); AU.addRequired(); + AU.addRequired(); } private: @@ -144,6 +161,7 @@ typedef DenseMap InstrToOrigTy; bool OptimizeExtUses(Instruction *I); bool OptimizeSelectInst(SelectInst *SI); bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool OptimizeExtractElementInst(Instruction *Inst); bool DupRetToEnableTailCallOpts(BasicBlock *BB); bool PlaceDbgValues(Function &F); bool sinkAndCmp(Function &F); @@ -168,8 +186,10 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PromotedInsts.clear(); ModifiedDT = false; - if (TM) TLI = TM->getTargetLowering(); + if (TM) + TLI = TM->getSubtargetImpl()->getTargetLowering(); TLInfo = &getAnalysis(); + TTI = &getAnalysis(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; @@ -662,10 +682,13 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, if (!ISDOpcode) continue; - // If the use is actually a legal node, there will not be an implicit - // truncate. - if (TLI.isOperationLegalOrCustom(ISDOpcode, - EVT::getEVT(TruncUser->getType()))) + // If the use is actually a legal node, there will not be an + // implicit truncate. + // FIXME: always querying the result type is just an + // approximation; some nodes' legality is determined by the + // operand or other means. There's no good way to find out though. + if (TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(TruncUser->getType(), true))) continue; // Don't bother for PHI nodes. @@ -978,7 +1001,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { } else { SmallPtrSet VisitedBBs; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { - if (!VisitedBBs.insert(*PI)) + if (!VisitedBBs.insert(*PI).second) continue; BasicBlock::InstListType &InstList = (*PI)->getInstList(); @@ -1250,46 +1273,75 @@ class TypePromotionTransaction { /// \brief Build a truncate instruction. class TruncBuilder : public TypePromotionAction { + Value *Val; public: /// \brief Build a truncate instruction of \p Opnd producing a \p Ty /// result. /// trunc Opnd to Ty. TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { IRBuilder<> Builder(Opnd); - Inst = cast(Builder.CreateTrunc(Opnd, Ty, "promoted")); - DEBUG(dbgs() << "Do: TruncBuilder: " << *Inst << "\n"); + Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); } - /// \brief Get the built instruction. - Instruction *getBuiltInstruction() { return Inst; } + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } /// \brief Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: TruncBuilder: " << *Inst << "\n"); - Inst->eraseFromParent(); + DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast(Val)) + IVal->eraseFromParent(); } }; /// \brief Build a sign extension instruction. class SExtBuilder : public TypePromotionAction { + Value *Val; public: /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty /// result. /// sext Opnd to Ty. SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) - : TypePromotionAction(Inst) { + : TypePromotionAction(InsertPt) { + IRBuilder<> Builder(InsertPt); + Val = Builder.CreateSExt(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); + } + + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast(Val)) + IVal->eraseFromParent(); + } + }; + + /// \brief Build a zero extension instruction. + class ZExtBuilder : public TypePromotionAction { + Value *Val; + public: + /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty + /// result. + /// zext Opnd to Ty. + ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) + : TypePromotionAction(InsertPt) { IRBuilder<> Builder(InsertPt); - Inst = cast(Builder.CreateSExt(Opnd, Ty, "promoted")); - DEBUG(dbgs() << "Do: SExtBuilder: " << *Inst << "\n"); + Val = Builder.CreateZExt(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); } - /// \brief Get the built instruction. - Instruction *getBuiltInstruction() { return Inst; } + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } /// \brief Remove the built instruction. void undo() override { - DEBUG(dbgs() << "Undo: SExtBuilder: " << *Inst << "\n"); - Inst->eraseFromParent(); + DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast(Val)) + IVal->eraseFromParent(); } }; @@ -1418,9 +1470,11 @@ public: /// Same as Value::mutateType. void mutateType(Instruction *Inst, Type *NewTy); /// Same as IRBuilder::createTrunc. - Instruction *createTrunc(Instruction *Opnd, Type *Ty); + Value *createTrunc(Instruction *Opnd, Type *Ty); /// Same as IRBuilder::createSExt. - Instruction *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as IRBuilder::createZExt. + Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); /// Same as Instruction::moveBefore. void moveBefore(Instruction *Inst, Instruction *Before); /// @} @@ -1452,20 +1506,28 @@ void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { Actions.push_back(make_unique(Inst, NewTy)); } -Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, - Type *Ty) { +Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, + Type *Ty) { std::unique_ptr Ptr(new TruncBuilder(Opnd, Ty)); - Instruction *I = Ptr->getBuiltInstruction(); + Value *Val = Ptr->getBuiltValue(); Actions.push_back(std::move(Ptr)); - return I; + return Val; } -Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, - Value *Opnd, Type *Ty) { +Value *TypePromotionTransaction::createSExt(Instruction *Inst, + Value *Opnd, Type *Ty) { std::unique_ptr Ptr(new SExtBuilder(Inst, Opnd, Ty)); - Instruction *I = Ptr->getBuiltInstruction(); + Value *Val = Ptr->getBuiltValue(); + Actions.push_back(std::move(Ptr)); + return Val; +} + +Value *TypePromotionTransaction::createZExt(Instruction *Inst, + Value *Opnd, Type *Ty) { + std::unique_ptr Ptr(new ZExtBuilder(Inst, Opnd, Ty)); + Value *Val = Ptr->getBuiltValue(); Actions.push_back(std::move(Ptr)); - return I; + return Val; } void TypePromotionTransaction::moveBefore(Instruction *Inst, @@ -1658,58 +1720,75 @@ static bool MightBeFoldableInst(Instruction *I) { /// \brief Hepler class to perform type promotion. class TypePromotionHelper { - /// \brief Utility function to check whether or not a sign extension of - /// \p Inst with \p ConsideredSExtType can be moved through \p Inst by either - /// using the operands of \p Inst or promoting \p Inst. + /// \brief Utility function to check whether or not a sign or zero extension + /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by + /// either using the operands of \p Inst or promoting \p Inst. + /// The type of the extension is defined by \p IsSExt. /// In other words, check if: - /// sext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredSExtType. + /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. /// #1 Promotion applies: - /// ConsideredSExtType Inst (sext opnd1 to ConsideredSExtType, ...). + /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). /// #2 Operand reuses: - /// sext opnd1 to ConsideredSExtType. + /// ext opnd1 to ConsideredExtType. /// \p PromotedInsts maps the instructions to their type before promotion. - static bool canGetThrough(const Instruction *Inst, Type *ConsideredSExtType, - const InstrToOrigTy &PromotedInsts); + static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, + const InstrToOrigTy &PromotedInsts, bool IsSExt); /// \brief Utility function to determine if \p OpIdx should be promoted when /// promoting \p Inst. - static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { + static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { if (isa(Inst) && OpIdx == 0) return false; return true; } - /// \brief Utility function to promote the operand of \p SExt when this - /// operand is a promotable trunc or sext. + /// \brief Utility function to promote the operand of \p Ext when this + /// operand is a promotable trunc or sext or zext. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInsts[out] contains how many non-free instructions have been - /// created to promote the operand of SExt. + /// created to promote the operand of Ext. /// Should never be called directly. - /// \return The promoted value which is used instead of SExt. - static Value *promoteOperandForTruncAndSExt(Instruction *SExt, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts); + /// \return The promoted value which is used instead of Ext. + static Value *promoteOperandForTruncAndAnyExt(Instruction *Ext, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts); - /// \brief Utility function to promote the operand of \p SExt when this + /// \brief Utility function to promote the operand of \p Ext when this /// operand is promotable and is not a supported trunc or sext. /// \p PromotedInsts maps the instructions to their type before promotion. /// \p CreatedInsts[out] contains how many non-free instructions have been - /// created to promote the operand of SExt. + /// created to promote the operand of Ext. /// Should never be called directly. - /// \return The promoted value which is used instead of SExt. - static Value *promoteOperandForOther(Instruction *SExt, + /// \return The promoted value which is used instead of Ext. + static Value *promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT, InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts); + unsigned &CreatedInsts, bool IsSExt); + + /// \see promoteOperandForOther. + static Value *signExtendOperandForOther(Instruction *Ext, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, true); + } + + /// \see promoteOperandForOther. + static Value *zeroExtendOperandForOther(Instruction *Ext, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInsts) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, false); + } public: - /// Type for the utility function that promotes the operand of SExt. - typedef Value *(*Action)(Instruction *SExt, TypePromotionTransaction &TPT, + /// Type for the utility function that promotes the operand of Ext. + typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts); - /// \brief Given a sign extend instruction \p SExt, return the approriate - /// action to promote the operand of \p SExt instead of using SExt. + /// \brief Given a sign/zero extend instruction \p Ext, return the approriate + /// action to promote the operand of \p Ext instead of using Ext. /// \return NULL if no promotable action is possible with the current /// sign extension. /// \p InsertedTruncs keeps track of all the truncate instructions inserted by @@ -1717,36 +1796,42 @@ public: /// because we do not want to promote these instructions as CodeGenPrepare /// will reinsert them later. Thus creating an infinite loop: create/remove. /// \p PromotedInsts maps the instructions to their type before promotion. - static Action getAction(Instruction *SExt, const SetOfInstrs &InsertedTruncs, + static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedTruncs, const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts); }; bool TypePromotionHelper::canGetThrough(const Instruction *Inst, - Type *ConsideredSExtType, - const InstrToOrigTy &PromotedInsts) { - // We can always get through sext. - if (isa(Inst)) + Type *ConsideredExtType, + const InstrToOrigTy &PromotedInsts, + bool IsSExt) { + // We can always get through zext. + if (isa(Inst)) + return true; + + // sext(sext) is ok too. + if (IsSExt && isa(Inst)) return true; // We can get through binary operator, if it is legal. In other words, the // binary operator must have a nuw or nsw flag. const BinaryOperator *BinOp = dyn_cast(Inst); if (BinOp && isa(BinOp) && - (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) + ((!IsSExt && BinOp->hasNoUnsignedWrap()) || + (IsSExt && BinOp->hasNoSignedWrap()))) return true; // Check if we can do the following simplification. - // sext(trunc(sext)) --> sext + // ext(trunc(opnd)) --> ext(opnd) if (!isa(Inst)) return false; Value *OpndVal = Inst->getOperand(0); - // Check if we can use this operand in the sext. - // If the type is larger than the result type of the sign extension, + // Check if we can use this operand in the extension. + // If the type is larger than the result type of the extension, // we cannot. if (OpndVal->getType()->getIntegerBitWidth() > - ConsideredSExtType->getIntegerBitWidth()) + ConsideredExtType->getIntegerBitWidth()) return false; // If the operand of the truncate is not an instruction, we will not have @@ -1757,18 +1842,19 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, return false; // Check if the source of the type is narrow enough. - // I.e., check that trunc just drops sign extended bits. - // #1 get the type of the operand. + // I.e., check that trunc just drops extended bits of the same kind of + // the extension. + // #1 get the type of the operand and check the kind of the extended bits. const Type *OpndType; InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); - if (It != PromotedInsts.end()) - OpndType = It->second; - else if (isa(Opnd)) - OpndType = cast(Opnd)->getOperand(0)->getType(); + if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt) + OpndType = It->second.Ty; + else if ((IsSExt && isa(Opnd)) || (!IsSExt && isa(Opnd))) + OpndType = Opnd->getOperand(0)->getType(); else return false; - // #2 check that the truncate just drop sign extended bits. + // #2 check that the truncate just drop extended bits. if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth()) return true; @@ -1776,149 +1862,167 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst, } TypePromotionHelper::Action TypePromotionHelper::getAction( - Instruction *SExt, const SetOfInstrs &InsertedTruncs, + Instruction *Ext, const SetOfInstrs &InsertedTruncs, const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { - Instruction *SExtOpnd = dyn_cast(SExt->getOperand(0)); - Type *SExtTy = SExt->getType(); - // If the operand of the sign extension is not an instruction, we cannot + assert((isa(Ext) || isa(Ext)) && + "Unexpected instruction type"); + Instruction *ExtOpnd = dyn_cast(Ext->getOperand(0)); + Type *ExtTy = Ext->getType(); + bool IsSExt = isa(Ext); + // If the operand of the extension is not an instruction, we cannot // get through. // If it, check we can get through. - if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) + if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) return nullptr; // Do not promote if the operand has been added by codegenprepare. // Otherwise, it means we are undoing an optimization that is likely to be // redone, thus causing potential infinite loop. - if (isa(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) + if (isa(ExtOpnd) && InsertedTruncs.count(ExtOpnd)) return nullptr; // SExt or Trunc instructions. // Return the related handler. - if (isa(SExtOpnd) || isa(SExtOpnd)) - return promoteOperandForTruncAndSExt; + if (isa(ExtOpnd) || isa(ExtOpnd) || + isa(ExtOpnd)) + return promoteOperandForTruncAndAnyExt; // Regular instruction. // Abort early if we will have to insert non-free instructions. - if (!SExtOpnd->hasOneUse() && - !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) + if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) return nullptr; - return promoteOperandForOther; + return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; } -Value *TypePromotionHelper::promoteOperandForTruncAndSExt( +Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( llvm::Instruction *SExt, TypePromotionTransaction &TPT, InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts) { // By construction, the operand of SExt is an instruction. Otherwise we cannot // get through it and this method should not be called. Instruction *SExtOpnd = cast(SExt->getOperand(0)); - // Replace sext(trunc(opnd)) or sext(sext(opnd)) - // => sext(opnd). - TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + Value *ExtVal = SExt; + if (isa(SExtOpnd)) { + // Replace s|zext(zext(opnd)) + // => zext(opnd). + Value *ZExt = + TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); + TPT.replaceAllUsesWith(SExt, ZExt); + TPT.eraseInstruction(SExt); + ExtVal = ZExt; + } else { + // Replace z|sext(trunc(opnd)) or sext(sext(opnd)) + // => z|sext(opnd). + TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + } CreatedInsts = 0; // Remove dead code. if (SExtOpnd->use_empty()) TPT.eraseInstruction(SExtOpnd); - // Check if the sext is still needed. - if (SExt->getType() != SExt->getOperand(0)->getType()) - return SExt; + // Check if the extension is still needed. + Instruction *ExtInst = dyn_cast(ExtVal); + if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) + return ExtVal; - // At this point we have: sext ty opnd to ty. - // Reassign the uses of SExt to the opnd and remove SExt. - Value *NextVal = SExt->getOperand(0); - TPT.eraseInstruction(SExt, NextVal); + // At this point we have: ext ty opnd to ty. + // Reassign the uses of ExtInst to the opnd and remove ExtInst. + Value *NextVal = ExtInst->getOperand(0); + TPT.eraseInstruction(ExtInst, NextVal); return NextVal; } -Value * -TypePromotionHelper::promoteOperandForOther(Instruction *SExt, - TypePromotionTransaction &TPT, - InstrToOrigTy &PromotedInsts, - unsigned &CreatedInsts) { - // By construction, the operand of SExt is an instruction. Otherwise we cannot +Value *TypePromotionHelper::promoteOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts, bool IsSExt) { + // By construction, the operand of Ext is an instruction. Otherwise we cannot // get through it and this method should not be called. - Instruction *SExtOpnd = cast(SExt->getOperand(0)); + Instruction *ExtOpnd = cast(Ext->getOperand(0)); CreatedInsts = 0; - if (!SExtOpnd->hasOneUse()) { - // SExtOpnd will be promoted. - // All its uses, but SExt, will need to use a truncated value of the + if (!ExtOpnd->hasOneUse()) { + // ExtOpnd will be promoted. + // All its uses, but Ext, will need to use a truncated value of the // promoted version. // Create the truncate now. - Instruction *Trunc = TPT.createTrunc(SExt, SExtOpnd->getType()); - Trunc->removeFromParent(); - // Insert it just after the definition. - Trunc->insertAfter(SExtOpnd); + Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); + if (Instruction *ITrunc = dyn_cast(Trunc)) { + ITrunc->removeFromParent(); + // Insert it just after the definition. + ITrunc->insertAfter(ExtOpnd); + } - TPT.replaceAllUsesWith(SExtOpnd, Trunc); - // Restore the operand of SExt (which has been replace by the previous call + TPT.replaceAllUsesWith(ExtOpnd, Trunc); + // Restore the operand of Ext (which has been replace by the previous call // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. - TPT.setOperand(SExt, 0, SExtOpnd); + TPT.setOperand(Ext, 0, ExtOpnd); } // Get through the Instruction: // 1. Update its type. - // 2. Replace the uses of SExt by Inst. - // 3. Sign extend each operand that needs to be sign extended. + // 2. Replace the uses of Ext by Inst. + // 3. Extend each operand that needs to be extended. // Remember the original type of the instruction before promotion. // This is useful to know that the high bits are sign extended bits. - PromotedInsts.insert( - std::pair(SExtOpnd, SExtOpnd->getType())); + PromotedInsts.insert(std::pair( + ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt))); // Step #1. - TPT.mutateType(SExtOpnd, SExt->getType()); + TPT.mutateType(ExtOpnd, Ext->getType()); // Step #2. - TPT.replaceAllUsesWith(SExt, SExtOpnd); + TPT.replaceAllUsesWith(Ext, ExtOpnd); // Step #3. - Instruction *SExtForOpnd = SExt; + Instruction *ExtForOpnd = Ext; - DEBUG(dbgs() << "Propagate SExt to operands\n"); - for (int OpIdx = 0, EndOpIdx = SExtOpnd->getNumOperands(); OpIdx != EndOpIdx; + DEBUG(dbgs() << "Propagate Ext to operands\n"); + for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; ++OpIdx) { - DEBUG(dbgs() << "Operand:\n" << *(SExtOpnd->getOperand(OpIdx)) << '\n'); - if (SExtOpnd->getOperand(OpIdx)->getType() == SExt->getType() || - !shouldSExtOperand(SExtOpnd, OpIdx)) { + DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); + if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || + !shouldExtOperand(ExtOpnd, OpIdx)) { DEBUG(dbgs() << "No need to propagate\n"); continue; } - // Check if we can statically sign extend the operand. - Value *Opnd = SExtOpnd->getOperand(OpIdx); + // Check if we can statically extend the operand. + Value *Opnd = ExtOpnd->getOperand(OpIdx); if (const ConstantInt *Cst = dyn_cast(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - TPT.setOperand( - SExtOpnd, OpIdx, - ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue())); + DEBUG(dbgs() << "Statically extend\n"); + unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); + APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) + : Cst->getValue().zext(BitWidth); + TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); continue; } // UndefValue are typed, so we have to statically sign extend them. if (isa(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - TPT.setOperand(SExtOpnd, OpIdx, UndefValue::get(SExt->getType())); + DEBUG(dbgs() << "Statically extend\n"); + TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); continue; } // Otherwise we have to explicity sign extend the operand. - // Check if SExt was reused to sign extend an operand. - if (!SExtForOpnd) { + // Check if Ext was reused to extend an operand. + if (!ExtForOpnd) { // If yes, create a new one. - DEBUG(dbgs() << "More operands to sext\n"); - SExtForOpnd = TPT.createSExt(SExt, Opnd, SExt->getType()); + DEBUG(dbgs() << "More operands to ext\n"); + ExtForOpnd = + cast(IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) + : TPT.createZExt(Ext, Opnd, Ext->getType())); ++CreatedInsts; } - TPT.setOperand(SExtForOpnd, 0, Opnd); + TPT.setOperand(ExtForOpnd, 0, Opnd); // Move the sign extension before the insertion point. - TPT.moveBefore(SExtForOpnd, SExtOpnd); - TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); + TPT.moveBefore(ExtForOpnd, ExtOpnd); + TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); // If more sext are required, new instructions will have to be created. - SExtForOpnd = nullptr; + ExtForOpnd = nullptr; } - if (SExtForOpnd == SExt) { - DEBUG(dbgs() << "Sign extension is useless now\n"); - TPT.eraseInstruction(SExt); + if (ExtForOpnd == Ext) { + DEBUG(dbgs() << "Extension is useless now\n"); + TPT.eraseInstruction(Ext); } - return SExtOpnd; + return ExtOpnd; } /// IsPromotionProfitable - Check whether or not promoting an instruction @@ -1951,8 +2055,8 @@ AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize, if (!ISDOpcode) return true; // Otherwise, check if the promoted instruction is legal or not. - return TLI.isOperationLegalOrCustom(ISDOpcode, - EVT::getEVT(PromotedInst->getType())); + return TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(PromotedInst->getType())); } /// MatchOperationAddr - Given an instruction or constant expr, see if we can @@ -2036,7 +2140,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, case Instruction::Shl: { // Can only handle X*C and X << C. ConstantInt *RHS = dyn_cast(AddrInst->getOperand(1)); - if (!RHS) return false; + if (!RHS) + return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) Scale = 1LL << Scale; @@ -2129,28 +2234,32 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return true; } - case Instruction::SExt: { - // Try to move this sext out of the way of the addressing mode. - Instruction *SExt = cast(AddrInst); + case Instruction::SExt: + case Instruction::ZExt: { + Instruction *Ext = dyn_cast(AddrInst); + if (!Ext) + return false; + + // Try to move this ext out of the way of the addressing mode. // Ask for a method for doing so. - TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( - SExt, InsertedTruncs, TLI, PromotedInsts); + TypePromotionHelper::Action TPH = + TypePromotionHelper::getAction(Ext, InsertedTruncs, TLI, PromotedInsts); if (!TPH) return false; TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); unsigned CreatedInsts = 0; - Value *PromotedOperand = TPH(SExt, TPT, PromotedInsts, CreatedInsts); + Value *PromotedOperand = TPH(Ext, TPT, PromotedInsts, CreatedInsts); // SExt has been moved away. // Thus either it will be rematched later in the recursive calls or it is // gone. Anyway, we must not fold it into the addressing mode at this point. // E.g., // op = add opnd, 1 - // idx = sext op + // idx = ext op // addr = gep base, idx // is now: - // promotedOpnd = sext opnd <- no match here + // promotedOpnd = ext opnd <- no match here // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) // addr = gep base, op <- match if (MovedAway) @@ -2289,10 +2398,10 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, /// Add the ultimately found memory instructions to MemoryUses. static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl > &MemoryUses, - SmallPtrSet &ConsideredInsts, + SmallPtrSetImpl &ConsideredInsts, const TargetLowering &TLI) { // If we already considered this instruction, we're done. - if (!ConsideredInsts.insert(I)) + if (!ConsideredInsts.insert(I).second) return false; // If this is an obviously unfoldable instruction, bail out. @@ -2506,7 +2615,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, worklist.pop_back(); // Break use-def graph loops. - if (!Visited.insert(V)) { + if (!Visited.insert(V).second) { Consensus = nullptr; break; } @@ -2696,8 +2805,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (AddrMode.BaseOffs) { Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); if (ResultIndex) { - // We need to add this separately from the scale above to help with - // SDAG consecutive load/store merging. + // We need to add this separately from the scale above to help with + // SDAG consecutive load/store merging. if (ResultPtr->getType() != I8PtrTy) ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); @@ -3105,6 +3214,367 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) { return MadeChange; } +namespace { +/// \brief Helper class to promote a scalar operation to a vector one. +/// This class is used to move downward extractelement transition. +/// E.g., +/// a = vector_op <2 x i32> +/// b = extractelement <2 x i32> a, i32 0 +/// c = scalar_op b +/// store c +/// +/// => +/// a = vector_op <2 x i32> +/// c = vector_op a (equivalent to scalar_op on the related lane) +/// * d = extractelement <2 x i32> c, i32 0 +/// * store d +/// Assuming both extractelement and store can be combine, we get rid of the +/// transition. +class VectorPromoteHelper { + /// Used to perform some checks on the legality of vector operations. + const TargetLowering &TLI; + + /// Used to estimated the cost of the promoted chain. + const TargetTransformInfo &TTI; + + /// The transition being moved downwards. + Instruction *Transition; + /// The sequence of instructions to be promoted. + SmallVector InstsToBePromoted; + /// Cost of combining a store and an extract. + unsigned StoreExtractCombineCost; + /// Instruction that will be combined with the transition. + Instruction *CombineInst; + + /// \brief The instruction that represents the current end of the transition. + /// Since we are faking the promotion until we reach the end of the chain + /// of computation, we need a way to get the current end of the transition. + Instruction *getEndOfTransition() const { + if (InstsToBePromoted.empty()) + return Transition; + return InstsToBePromoted.back(); + } + + /// \brief Return the index of the original value in the transition. + /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, + /// c, is at index 0. + unsigned getTransitionOriginalValueIdx() const { + assert(isa(Transition) && + "Other kind of transitions are not supported yet"); + return 0; + } + + /// \brief Return the index of the index in the transition. + /// E.g., for "extractelement <2 x i32> c, i32 0" the index + /// is at index 1. + unsigned getTransitionIdx() const { + assert(isa(Transition) && + "Other kind of transitions are not supported yet"); + return 1; + } + + /// \brief Get the type of the transition. + /// This is the type of the original value. + /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the + /// transition is <2 x i32>. + Type *getTransitionType() const { + return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); + } + + /// \brief Promote \p ToBePromoted by moving \p Def downward through. + /// I.e., we have the following sequence: + /// Def = Transition a to + /// b = ToBePromoted Def, ... + /// => + /// b = ToBePromoted a, ... + /// Def = Transition ToBePromoted to + void promoteImpl(Instruction *ToBePromoted); + + /// \brief Check whether or not it is profitable to promote all the + /// instructions enqueued to be promoted. + bool isProfitableToPromote() { + Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); + unsigned Index = isa(ValIdx) + ? cast(ValIdx)->getZExtValue() + : -1; + Type *PromotedType = getTransitionType(); + + StoreInst *ST = cast(CombineInst); + unsigned AS = ST->getPointerAddressSpace(); + unsigned Align = ST->getAlignment(); + // Check if this store is supported. + if (!TLI.allowsMisalignedMemoryAccesses( + TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) { + // If this is not supported, there is no way we can combine + // the extract with the store. + return false; + } + + // The scalar chain of computation has to pay for the transition + // scalar to vector. + // The vector chain has to account for the combining cost. + uint64_t ScalarCost = + TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); + uint64_t VectorCost = StoreExtractCombineCost; + for (const auto &Inst : InstsToBePromoted) { + // Compute the cost. + // By construction, all instructions being promoted are arithmetic ones. + // Moreover, one argument is a constant that can be viewed as a splat + // constant. + Value *Arg0 = Inst->getOperand(0); + bool IsArg0Constant = isa(Arg0) || isa(Arg0) || + isa(Arg0); + TargetTransformInfo::OperandValueKind Arg0OVK = + IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue + : TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueKind Arg1OVK = + !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue + : TargetTransformInfo::OK_AnyValue; + ScalarCost += TTI.getArithmeticInstrCost( + Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); + VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, + Arg0OVK, Arg1OVK); + } + DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " + << ScalarCost << "\nVector: " << VectorCost << '\n'); + return ScalarCost > VectorCost; + } + + /// \brief Generate a constant vector with \p Val with the same + /// number of elements as the transition. + /// \p UseSplat defines whether or not \p Val should be replicated + /// accross the whole vector. + /// In other words, if UseSplat == true, we generate , + /// otherwise we generate a vector with as many undef as possible: + /// where \p Val is only + /// used at the index of the extract. + Value *getConstantVector(Constant *Val, bool UseSplat) const { + unsigned ExtractIdx = UINT_MAX; + if (!UseSplat) { + // If we cannot determine where the constant must be, we have to + // use a splat constant. + Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); + if (ConstantInt *CstVal = dyn_cast(ValExtractIdx)) + ExtractIdx = CstVal->getSExtValue(); + else + UseSplat = true; + } + + unsigned End = getTransitionType()->getVectorNumElements(); + if (UseSplat) + return ConstantVector::getSplat(End, Val); + + SmallVector ConstVec; + UndefValue *UndefVal = UndefValue::get(Val->getType()); + for (unsigned Idx = 0; Idx != End; ++Idx) { + if (Idx == ExtractIdx) + ConstVec.push_back(Val); + else + ConstVec.push_back(UndefVal); + } + return ConstantVector::get(ConstVec); + } + + /// \brief Check if promoting to a vector type an operand at \p OperandIdx + /// in \p Use can trigger undefined behavior. + static bool canCauseUndefinedBehavior(const Instruction *Use, + unsigned OperandIdx) { + // This is not safe to introduce undef when the operand is on + // the right hand side of a division-like instruction. + if (OperandIdx != 1) + return false; + switch (Use->getOpcode()) { + default: + return false; + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + return true; + case Instruction::FDiv: + case Instruction::FRem: + return !Use->hasNoNaNs(); + } + llvm_unreachable(nullptr); + } + +public: + VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI, + Instruction *Transition, unsigned CombineCost) + : TLI(TLI), TTI(TTI), Transition(Transition), + StoreExtractCombineCost(CombineCost), CombineInst(nullptr) { + assert(Transition && "Do not know how to promote null"); + } + + /// \brief Check if we can promote \p ToBePromoted to \p Type. + bool canPromote(const Instruction *ToBePromoted) const { + // We could support CastInst too. + return isa(ToBePromoted); + } + + /// \brief Check if it is profitable to promote \p ToBePromoted + /// by moving downward the transition through. + bool shouldPromote(const Instruction *ToBePromoted) const { + // Promote only if all the operands can be statically expanded. + // Indeed, we do not want to introduce any new kind of transitions. + for (const Use &U : ToBePromoted->operands()) { + const Value *Val = U.get(); + if (Val == getEndOfTransition()) { + // If the use is a division and the transition is on the rhs, + // we cannot promote the operation, otherwise we may create a + // division by zero. + if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) + return false; + continue; + } + if (!isa(Val) && !isa(Val) && + !isa(Val)) + return false; + } + // Check that the resulting operation is legal. + int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); + if (!ISDOpcode) + return false; + return StressStoreExtract || + TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(getTransitionType(), true)); + } + + /// \brief Check whether or not \p Use can be combined + /// with the transition. + /// I.e., is it possible to do Use(Transition) => AnotherUse? + bool canCombine(const Instruction *Use) { return isa(Use); } + + /// \brief Record \p ToBePromoted as part of the chain to be promoted. + void enqueueForPromotion(Instruction *ToBePromoted) { + InstsToBePromoted.push_back(ToBePromoted); + } + + /// \brief Set the instruction that will be combined with the transition. + void recordCombineInstruction(Instruction *ToBeCombined) { + assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); + CombineInst = ToBeCombined; + } + + /// \brief Promote all the instructions enqueued for promotion if it is + /// is profitable. + /// \return True if the promotion happened, false otherwise. + bool promote() { + // Check if there is something to promote. + // Right now, if we do not have anything to combine with, + // we assume the promotion is not profitable. + if (InstsToBePromoted.empty() || !CombineInst) + return false; + + // Check cost. + if (!StressStoreExtract && !isProfitableToPromote()) + return false; + + // Promote. + for (auto &ToBePromoted : InstsToBePromoted) + promoteImpl(ToBePromoted); + InstsToBePromoted.clear(); + return true; + } +}; +} // End of anonymous namespace. + +void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { + // At this point, we know that all the operands of ToBePromoted but Def + // can be statically promoted. + // For Def, we need to use its parameter in ToBePromoted: + // b = ToBePromoted ty1 a + // Def = Transition ty1 b to ty2 + // Move the transition down. + // 1. Replace all uses of the promoted operation by the transition. + // = ... b => = ... Def. + assert(ToBePromoted->getType() == Transition->getType() && + "The type of the result of the transition does not match " + "the final type"); + ToBePromoted->replaceAllUsesWith(Transition); + // 2. Update the type of the uses. + // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. + Type *TransitionTy = getTransitionType(); + ToBePromoted->mutateType(TransitionTy); + // 3. Update all the operands of the promoted operation with promoted + // operands. + // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. + for (Use &U : ToBePromoted->operands()) { + Value *Val = U.get(); + Value *NewVal = nullptr; + if (Val == Transition) + NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); + else if (isa(Val) || isa(Val) || + isa(Val)) { + // Use a splat constant if it is not safe to use undef. + NewVal = getConstantVector( + cast(Val), + isa(Val) || + canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); + } else + assert(0 && "Did you modified shouldPromote and forgot to update this?"); + ToBePromoted->setOperand(U.getOperandNo(), NewVal); + } + Transition->removeFromParent(); + Transition->insertAfter(ToBePromoted); + Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); +} + +/// Some targets can do store(extractelement) with one instruction. +/// Try to push the extractelement towards the stores when the target +/// has this feature and this is profitable. +bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) { + unsigned CombineCost = UINT_MAX; + if (DisableStoreExtract || !TLI || + (!StressStoreExtract && + !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), + Inst->getOperand(1), CombineCost))) + return false; + + // At this point we know that Inst is a vector to scalar transition. + // Try to move it down the def-use chain, until: + // - We can combine the transition with its single use + // => we got rid of the transition. + // - We escape the current basic block + // => we would need to check that we are moving it at a cheaper place and + // we do not do that for now. + BasicBlock *Parent = Inst->getParent(); + DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); + VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost); + // If the transition has more than one use, assume this is not going to be + // beneficial. + while (Inst->hasOneUse()) { + Instruction *ToBePromoted = cast(*Inst->user_begin()); + DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); + + if (ToBePromoted->getParent() != Parent) { + DEBUG(dbgs() << "Instruction to promote is in a different block (" + << ToBePromoted->getParent()->getName() + << ") than the transition (" << Parent->getName() << ").\n"); + return false; + } + + if (VPH.canCombine(ToBePromoted)) { + DEBUG(dbgs() << "Assume " << *Inst << '\n' + << "will be combined with: " << *ToBePromoted << '\n'); + VPH.recordCombineInstruction(ToBePromoted); + bool Changed = VPH.promote(); + NumStoreExtractExposed += Changed; + return Changed; + } + + DEBUG(dbgs() << "Try promoting.\n"); + if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) + return false; + + DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); + + VPH.enqueueForPromotion(ToBePromoted); + Inst = ToBePromoted; + } + return false; +} + bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (PHINode *P = dyn_cast(I)) { // It is possible for very late stage optimizations (such as SimplifyCFG) @@ -3199,6 +3669,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (ShuffleVectorInst *SVI = dyn_cast(I)) return OptimizeShuffleVectorInst(SVI); + if (isa(I)) + return OptimizeExtractElementInst(I); + return false; } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index d2231ec..3d62d48 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -20,24 +20,20 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "post-RA-sched" -CriticalAntiDepBreaker:: -CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : - AntiDepBreaker(), MF(MFi), - MRI(MF.getRegInfo()), - TII(MF.getTarget().getInstrInfo()), - TRI(MF.getTarget().getRegisterInfo()), - RegClassInfo(RCI), - Classes(TRI->getNumRegs(), nullptr), - KillIndices(TRI->getNumRegs(), 0), - DefIndices(TRI->getNumRegs(), 0), - KeepRegs(TRI->getNumRegs(), false) {} +CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi, + const RegisterClassInfo &RCI) + : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getSubtarget().getInstrInfo()), + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), + Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0), + DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {} CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { } @@ -273,19 +269,10 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - // FIXME: we should use a SubRegIterator that includes self (as above), so - // we don't have to repeat all this code for the reg itself. - DefIndices[Reg] = Count; - KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.reset(Reg); - Classes[Reg] = nullptr; - RegRefs.erase(Reg); - // Repeat, for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - unsigned SubregReg = *SubRegs; + // For the reg itself and all subregs: update the def to current; + // reset the kill state, any restrictions, and references. + for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) { + unsigned SubregReg = *SRI; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; KeepRegs.reset(SubregReg); @@ -317,19 +304,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, RegRefs.insert(std::make_pair(Reg, &MO)); - // FIXME: we should use an MCRegAliasIterator that includes self so we don't - // have to repeat all this code for the reg itself. - // It wasn't previously live but now it is, this is a kill. - if (KillIndices[Reg] == ~0u) { - KillIndices[Reg] = Count; - DefIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - } - // Repeat, for all aliases. - for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + // Repeat for all aliases. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; if (KillIndices[AliasReg] == ~0u) { KillIndices[AliasReg] = Count; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 45e4ff5..ceef74d 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H -#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H +#ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" @@ -38,32 +38,32 @@ class TargetRegisterInfo; const TargetRegisterInfo *TRI; const RegisterClassInfo &RegClassInfo; - /// AllocatableSet - The set of allocatable registers. + /// The set of allocatable registers. /// We'll be ignoring anti-dependencies on non-allocatable registers, /// because they may not be safe to break. const BitVector AllocatableSet; - /// Classes - For live regs that are only used in one register class in a + /// For live regs that are only used in one register class in a /// live range, the register class. If the register is not live, the /// corresponding value is null. If the register is live but used in /// multiple register classes, the corresponding value is -1 casted to a /// pointer. std::vector Classes; - /// RegRefs - Map registers to all their references within a live range. + /// Map registers to all their references within a live range. std::multimap RegRefs; typedef std::multimap::const_iterator RegRefIter; - /// KillIndices - The index of the most recent kill (proceeding bottom-up), + /// The index of the most recent kill (proceeding bottom-up), /// or ~0u if the register is not live. std::vector KillIndices; - /// DefIndices - The index of the most recent complete def (proceeding + /// The index of the most recent complete def (proceeding /// bottom up), or ~0u if the register is live. std::vector DefIndices; - /// KeepRegs - A set of registers which are live and cannot be changed to + /// A set of registers which are live and cannot be changed to /// break anti-dependencies. BitVector KeepRegs; @@ -71,26 +71,23 @@ class TargetRegisterInfo; CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); ~CriticalAntiDepBreaker(); - /// Start - Initialize anti-dep breaking for a new basic block. + /// Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB) override; - /// BreakAntiDependencies - Identifiy anti-dependencies along the critical - /// path + /// Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. - /// unsigned BreakAntiDependencies(const std::vector& SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, DbgValueVector &DbgValues) override; - /// Observe - Update liveness information to account for the current + /// Update liveness information to account for the current /// instruction, which will not be scheduled. - /// void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex) override; - /// Finish - Finish anti-dep breaking for a basic block. + /// Finish anti-dep breaking for a basic block. void FinishBlock() override; private: diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index bc6e9dc..0a188c0 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -106,16 +106,15 @@ namespace llvm { class DefaultVLIWScheduler : public ScheduleDAGInstrs { public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, - MachineDominatorTree &MDT, bool IsPostRA); + bool IsPostRA); // Schedule - Actual scheduling work. void schedule() override; }; } -DefaultVLIWScheduler::DefaultVLIWScheduler( - MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - bool IsPostRA) : - ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { +DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, + MachineLoopInfo &MLI, bool IsPostRA) + : ScheduleDAGInstrs(MF, &MLI, IsPostRA) { CanHandleTerminators = true; } @@ -125,12 +124,12 @@ void DefaultVLIWScheduler::schedule() { } // VLIWPacketizerList Ctor -VLIWPacketizerList::VLIWPacketizerList( - MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - bool IsPostRA) : TM(MF.getTarget()), MF(MF) { - TII = TM.getInstrInfo(); - ResourceTracker = TII->CreateTargetScheduleState(&TM, nullptr); - VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); +VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF, + MachineLoopInfo &MLI, bool IsPostRA) + : MF(MF) { + TII = MF.getSubtarget().getInstrInfo(); + ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA); } // VLIWPacketizerList Dtor diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 2b144d8..48213c1 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -19,7 +19,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "codegen-dce" @@ -90,8 +91,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { bool AnyChanges = false; MRI = &MF.getRegInfo(); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); // Loop over all instructions in all blocks, from bottom to top, so that it's // more likely that chains of dependent but ultimately dead instructions will @@ -122,19 +123,10 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (isDead(MI)) { DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); // It is possible that some DBG_VALUE instructions refer to this - // instruction. Examine each def operand for such references; - // if found, mark the DBG_VALUE as undef (but don't delete it). - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - MRI->markUsesInDebugValueAsUndef(Reg); - } + // instruction. They get marked as undef and will be deleted + // in the live debug variable analysis. + MI->eraseFromParentAndMarkDBGValuesForRemoval(); AnyChanges = true; - MI->eraseFromParent(); ++NumDeletes; MIE = MBB->rend(); // MII is now pointing to the next instruction to process, diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index a195586..75b74d9 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; @@ -50,6 +51,11 @@ namespace { bool runOnFunction(Function &Fn) override; + bool doFinalization(Module &M) override { + RewindFunction = nullptr; + return false; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { } const char *getPassName() const override { @@ -118,7 +124,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { return false; // Find the rewind function if we didn't already. - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); if (!RewindFunction) { LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index c470632..995606f 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -153,8 +153,8 @@ private: public: /// runOnMachineFunction - Initialize per-function data structures. void runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); LiveRegUnits.clear(); LiveRegUnits.setUniverse(TRI->getNumRegUnits()); @@ -245,7 +245,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI || DefMI->getParent() != Head) continue; - if (InsertAfter.insert(DefMI)) + if (InsertAfter.insert(DefMI).second) DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); if (DefMI->isTerminator()) { DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); @@ -580,7 +580,7 @@ namespace { class EarlyIfConverter : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const MCSchedModel *SchedModel; + MCSchedModel SchedModel; MachineRegisterInfo *MRI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; @@ -688,7 +688,7 @@ bool EarlyIfConverter::shouldConvertIf() { FBBTrace.getCriticalPath()); // Set a somewhat arbitrary limit on the critical path extension we accept. - unsigned CritLimit = SchedModel->MispredictPenalty/2; + unsigned CritLimit = SchedModel.MispredictPenalty/2; // If-conversion only makes sense when there is unexploited ILP. Compute the // maximum-ILP resource length of the trace after if-conversion. Compare it @@ -782,8 +782,8 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { .enableEarlyIfConversion()) return false; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); SchedModel = MF.getTarget().getSubtarget().getSchedModel(); MRI = &MF.getRegInfo(); diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp index e976d7f..85b0893 100644 --- a/lib/CodeGen/ErlangGC.cpp +++ b/lib/CodeGen/ErlangGC.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -53,7 +54,7 @@ ErlangGC::ErlangGC() { MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL) const { - const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); return Label; diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index cf55b68..3680498 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -29,7 +29,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "execution-fix" @@ -713,13 +714,13 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; - TII = MF->getTarget().getInstrInfo(); - TRI = MF->getTarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); LiveRegs = nullptr; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " - << RC->getName() << " **********\n"); + << TRI->getRegClassName(RC) << " **********\n"); // If no relevant registers are used in the function, we can skip it // completely. diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index 90b62b5..55e809e 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -19,7 +19,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "expand-isel-pseudos" @@ -46,7 +46,7 @@ INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - const TargetLowering *TLI = MF.getTarget().getTargetLowering(); + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); // Iterate through each instruction in the function, looking for pseudos. for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 8969bcc..e7bf143 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -20,8 +20,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "postrapseudos" @@ -182,8 +183,8 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Machine Function\n" << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" << "********** Function: " << MF.getName() << '\n'); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); bool MadeChange = false; diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp new file mode 100644 index 0000000..5e7e853 --- /dev/null +++ b/lib/CodeGen/ForwardControlFlowIntegrity.cpp @@ -0,0 +1,374 @@ +//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief A pass that instruments code with fast checks for indirect calls and +/// hooks for a function to check violations. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "cfi" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" +#include "llvm/CodeGen/JumpInstrTables.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumCFIIndirectCalls, + "Number of indirect call sites rewritten by the CFI pass"); + +char ForwardControlFlowIntegrity::ID = 0; +INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi", + "Control-Flow Integrity", true, true) +INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); +INITIALIZE_PASS_DEPENDENCY(JumpInstrTables); +INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi", + "Control-Flow Integrity", true, true) + +ModulePass *llvm::createForwardControlFlowIntegrityPass() { + return new ForwardControlFlowIntegrity(); +} + +ModulePass *llvm::createForwardControlFlowIntegrityPass( + JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, + StringRef CFIFuncName) { + return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing, + CFIFuncName); +} + +// Checks to see if a given CallSite is making an indirect call, including +// cases where the indirect call is made through a bitcast. +static bool isIndirectCall(CallSite &CS) { + if (CS.getCalledFunction()) + return false; + + // Check the value to see if it is merely a bitcast of a function. In + // this case, it will translate to a direct function call in the resulting + // assembly, so we won't treat it as an indirect call here. + const Value *V = CS.getCalledValue(); + if (const ConstantExpr *CE = dyn_cast(V)) { + return !(CE->isCast() && isa(CE->getOperand(0))); + } + + // Otherwise, since we know it's a call, it must be an indirect call + return true; +} + +static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning"; + +ForwardControlFlowIntegrity::ForwardControlFlowIntegrity() + : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single), + CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") { + initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); +} + +ForwardControlFlowIntegrity::ForwardControlFlowIntegrity( + JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing, + std::string CFIFuncName) + : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType), + CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) { + initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry()); +} + +ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {} + +void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); +} + +void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) { + // To get the indirect calls, we iterate over all functions and iterate over + // the list of basic blocks in each. We extract a total list of indirect calls + // before modifying any of them, since our modifications will modify the list + // of basic blocks. + for (Function &F : M) { + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + CallSite CS(&I); + if (!(CS && isIndirectCall(CS))) + continue; + + Value *CalledValue = CS.getCalledValue(); + + // Don't rewrite this instruction if the indirect call is actually just + // inline assembly, since our transformation will generate an invalid + // module in that case. + if (isa(CalledValue)) + continue; + + IndirectCalls.push_back(&I); + } + } + } +} + +void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M, + CFITables &CFIT) { + Type *Int64Ty = Type::getInt64Ty(M.getContext()); + for (Instruction *I : IndirectCalls) { + CallSite CS(I); + Value *CalledValue = CS.getCalledValue(); + + // Get the function type for this call and look it up in the tables. + Type *VTy = CalledValue->getType(); + PointerType *PTy = dyn_cast(VTy); + Type *EltTy = PTy->getElementType(); + FunctionType *FunTy = dyn_cast(EltTy); + FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy); + ++NumCFIIndirectCalls; + Constant *JumpTableStart = nullptr; + Constant *JumpTableMask = nullptr; + Constant *JumpTableSize = nullptr; + + // Some call sites have function types that don't correspond to any + // address-taken function in the module. This happens when function pointers + // are passed in from external code. + auto it = CFIT.find(TransformedTy); + if (it == CFIT.end()) { + // In this case, make sure that the function pointer will change by + // setting the mask and the start to be 0 so that the transformed + // function is 0. + JumpTableStart = ConstantInt::get(Int64Ty, 0); + JumpTableMask = ConstantInt::get(Int64Ty, 0); + JumpTableSize = ConstantInt::get(Int64Ty, 0); + } else { + JumpTableStart = it->second.StartValue; + JumpTableMask = it->second.MaskValue; + JumpTableSize = it->second.Size; + } + + rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask, + JumpTableSize); + } + + return; +} + +bool ForwardControlFlowIntegrity::runOnModule(Module &M) { + JumpInstrTableInfo *JITI = &getAnalysis(); + Type *Int64Ty = Type::getInt64Ty(M.getContext()); + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + + // JumpInstrTableInfo stores information about the alignment of each entry. + // The alignment returned by JumpInstrTableInfo is alignment in bytes, not + // in the exponent. + ByteAlignment = JITI->entryByteAlignment(); + LogByteAlignment = llvm::Log2_64(ByteAlignment); + + // Set up tables for control-flow integrity based on information about the + // jump-instruction tables. + CFITables CFIT; + for (const auto &KV : JITI->getTables()) { + uint64_t Size = static_cast(KV.second.size()); + uint64_t TableSize = NextPowerOf2(Size); + + int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment; + Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue); + Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size); + + // The base of the table is defined to be the first jumptable function in + // the table. + Function *First = KV.second.begin()->second; + Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy); + CFIT[KV.first].StartValue = JumpTableStartValue; + CFIT[KV.first].MaskValue = JumpTableMaskValue; + CFIT[KV.first].Size = JumpTableSize; + } + + if (CFIT.empty()) + return false; + + getIndirectCalls(M); + + if (!CFIEnforcing) { + addWarningFunction(M); + } + + // Update the instructions with the check and the indirect jump through our + // table. + updateIndirectCalls(M, CFIT); + + return true; +} + +void ForwardControlFlowIntegrity::addWarningFunction(Module &M) { + PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext()); + + // Get the type of the Warning Function: void (i8*, i8*), + // where the first argument is the name of the function in which the violation + // occurs, and the second is the function pointer that violates CFI. + SmallVector WarningFunArgs; + WarningFunArgs.push_back(CharPtrTy); + WarningFunArgs.push_back(CharPtrTy); + FunctionType *WarningFunTy = + FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false); + + if (!CFIFuncName.empty()) { + Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy); + if (!FailureFun) + report_fatal_error("Could not get or insert the function specified by" + " -cfi-func-name"); + } else { + // The default warning function swallows the warning and lets the call + // continue, since there's no generic way for it to print out this + // information. + Function *WarningFun = M.getFunction(cfi_failure_func_name); + if (!WarningFun) { + WarningFun = + Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage, + cfi_failure_func_name, &M); + } + + BasicBlock *Entry = + BasicBlock::Create(M.getContext(), "entry", WarningFun, 0); + ReturnInst::Create(M.getContext(), Entry); + } +} + +void ForwardControlFlowIntegrity::rewriteFunctionPointer( + Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart, + Constant *JumpTableMask, Constant *JumpTableSize) { + IRBuilder<> TempBuilder(I); + + Type *OrigFunType = FunPtr->getType(); + + BasicBlock *CurBB = cast(I->getParent()); + Function *CurF = cast(CurBB->getParent()); + Type *Int64Ty = Type::getInt64Ty(M.getContext()); + + Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty); + Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty); + + Value *NewFunPtr = nullptr; + Value *Check = nullptr; + switch (CFIType) { + case CFIntegrity::Sub: { + // This is the subtract, mask, and add version. + // Subtract from the base. + Value *Sub = TempBuilder.CreateSub(TI, TStartInt); + + // Mask the difference to force this to be a table offset. + Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask); + + // Add it back to the base. + Value *Result = TempBuilder.CreateAdd(And, TStartInt); + + // Convert it back into a function pointer that we can call. + NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); + break; + } + case CFIntegrity::Ror: { + // This is the subtract and rotate version. + // Rotate right by the alignment value. The optimizer should recognize + // this sequence as a rotation. + + // This cast is safe, since unsigned is always a subset of uint64_t. + uint64_t LogByteAlignment64 = static_cast(LogByteAlignment); + Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64); + Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64); + + // Subtract from the base. + Value *Sub = TempBuilder.CreateSub(TI, TStartInt); + + // Create the equivalent of a rotate-right instruction. + Value *Shr = TempBuilder.CreateLShr(Sub, RightShift); + Value *Shl = TempBuilder.CreateShl(Sub, LeftShift); + Value *Or = TempBuilder.CreateOr(Shr, Shl); + + // Perform unsigned comparison to check for inclusion in the table. + Check = TempBuilder.CreateICmpULT(Or, JumpTableSize); + NewFunPtr = FunPtr; + break; + } + case CFIntegrity::Add: { + // This is the mask and add version. + // Mask the function pointer to turn it into an offset into the table. + Value *And = TempBuilder.CreateAnd(TI, JumpTableMask); + + // Then or this offset to the base and get the pointer value. + Value *Result = TempBuilder.CreateAdd(And, TStartInt); + + // Convert it back into a function pointer that we can call. + NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType); + break; + } + } + + if (!CFIEnforcing) { + // If a check hasn't been added (in the rotation version), then check to see + // if it's the same as the original function. This check determines whether + // or not we call the CFI failure function. + if (!Check) + Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr); + BasicBlock *InvalidPtrBlock = + BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0); + BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I); + + // Remove the unconditional branch that connects the two blocks. + TerminatorInst *TermInst = CurBB->getTerminator(); + TermInst->eraseFromParent(); + + // Add a conditional branch that depends on the Check above. + BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB); + + // Call the warning function for this pointer, then continue. + Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock); + insertWarning(M, InvalidPtrBlock, BI, FunPtr); + } else { + // Modify the instruction to call this value. + CallSite CS(I); + CS.setCalledFunction(NewFunPtr); + } +} + +void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block, + Instruction *I, Value *FunPtr) { + Function *ParentFun = cast(Block->getParent()); + + // Get the function to call right before the instruction. + Function *WarningFun = nullptr; + if (CFIFuncName.empty()) { + WarningFun = M.getFunction(cfi_failure_func_name); + } else { + WarningFun = M.getFunction(CFIFuncName); + } + + assert(WarningFun && "Could not find the CFI failure function"); + + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + + IRBuilder<> WarningInserter(I); + // Create a mergeable GlobalVariable containing the name of the function. + Value *ParentNameGV = + WarningInserter.CreateGlobalString(ParentFun->getName()); + Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy); + Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy); + WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr); +} diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index c3e4f3e..ed40982 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -73,7 +73,7 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, std::unique_ptr S = I->instantiate(); S->M = M; S->Name = Name; - StrategyMap.GetOrCreateValue(Name).setValue(S.get()); + StrategyMap[Name] = S.get(); StrategyList.push_back(std::move(S)); return StrategyList.back().get(); } diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 1fdff6b..b346657 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -377,7 +378,7 @@ void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { } void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { - const TargetFrameLowering *TFI = TM->getFrameLowering(); + const TargetFrameLowering *TFI = TM->getSubtargetImpl()->getFrameLowering(); assert(TFI && "TargetRegisterInfo not available!"); for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); @@ -403,7 +404,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { TM = &MF.getTarget(); MMI = &getAnalysis(); - TII = TM->getInstrInfo(); + TII = TM->getSubtargetImpl()->getInstrInfo(); // Find the size of the stack frame. FI->setFrameSize(MF.getFrameInfo()->getStackSize()); diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 027ee38..457d7d6 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -68,6 +68,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "global-merge" @@ -142,7 +143,7 @@ INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); const DataLayout *DL = TLI->getDataLayout(); // FIXME: Infer the maximum possible offset depending on the actual users @@ -281,7 +282,7 @@ bool GlobalMerge::doInitialization(Module &M) { DenseMap > Globals, ConstGlobals, BSSGlobals; - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); const DataLayout *DL = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 1502d5f..e84d25d 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -30,7 +31,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -161,6 +161,7 @@ namespace { const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; @@ -177,6 +178,7 @@ namespace { } void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -269,15 +271,16 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { - TLI = MF.getTarget().getTargetLowering(); - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TLI = MF.getSubtarget().getTargetLowering(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MBFI = &getAnalysis(); MBPI = &getAnalysis(); MRI = &MF.getRegInfo(); const TargetSubtargetInfo &ST = MF.getTarget().getSubtarget(); - SchedModel.init(*ST.getSchedModel(), &ST, TII); + SchedModel.init(ST.getSchedModel(), &ST, TII); if (!TII) return false; @@ -286,9 +289,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { bool BFChange = false; if (!PreRegAlloc) { // Tail merge tend to expose more if-conversion opportunities. - BranchFolder BF(true, false); - BFChange = BF.OptimizeFunction(MF, TII, - MF.getTarget().getRegisterInfo(), + BranchFolder BF(true, false, *MBFI, *MBPI); + BFChange = BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable()); } @@ -420,9 +422,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { - BranchFolder BF(false, false); - BF.OptimizeFunction(MF, TII, - MF.getTarget().getRegisterInfo(), + BranchFolder BF(false, false, *MBFI, *MBPI); + BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable()); } @@ -940,9 +941,8 @@ static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { /// to determine if it can be if-converted. If predecessor is already enqueued, /// dequeue it! void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - E = BB->pred_end(); PI != E; ++PI) { - BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()]; + for (const auto &Predecessor : BB->predecessors()) { + BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()]; if (PBBI.IsDone || PBBI.BB == BB) continue; PBBI.IsAnalyzed = false; @@ -1184,6 +1184,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { bool HasEarlyExit = CvtBBI->FalseBB != nullptr; uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0; uint32_t WeightScale = 0; + if (HasEarlyExit) { // Get weights before modifying CvtBBI->BB and BBI.BB. CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB); @@ -1192,6 +1193,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB); SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale); } + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index f3c8d3d..6a6e15d 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -139,21 +138,16 @@ private: ~InlineSpiller() {} public: - InlineSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm) - : MF(mf), - LIS(pass.getAnalysis()), - LSS(pass.getAnalysis()), - AA(&pass.getAnalysis()), - MDT(pass.getAnalysis()), - Loops(pass.getAnalysis()), - VRM(vrm), - MFI(*mf.getFrameInfo()), - MRI(mf.getRegInfo()), - TII(*mf.getTarget().getInstrInfo()), - TRI(*mf.getTarget().getRegisterInfo()), - MBFI(pass.getAnalysis()) {} + InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) + : MF(mf), LIS(pass.getAnalysis()), + LSS(pass.getAnalysis()), + AA(&pass.getAnalysis()), + MDT(pass.getAnalysis()), + Loops(pass.getAnalysis()), VRM(vrm), + MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), + TII(*mf.getSubtarget().getInstrInfo()), + TRI(*mf.getSubtarget().getRegisterInfo()), + MBFI(pass.getAnalysis()) {} void spill(LiveRangeEdit &) override; @@ -190,11 +184,16 @@ private: } namespace llvm { + +Spiller::~Spiller() { } +void Spiller::anchor() { } + Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { return new InlineSpiller(pass, mf, vrm); } + } //===----------------------------------------------------------------------===// @@ -824,7 +823,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { WorkList.push_back(std::make_pair(LI, VNI)); do { std::tie(LI, VNI) = WorkList.pop_back_val(); - if (!UsedValues.insert(VNI)) + if (!UsedValues.insert(VNI).second) continue; if (VNI->isPHIDef()) { @@ -853,6 +852,15 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { + + // Analyze instruction + SmallVector, 8> Ops; + MIBundleOperands::VirtRegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + + if (!RI.Reads) + return false; + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); @@ -883,9 +891,6 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, // If the instruction also writes VirtReg.reg, it had better not require the // same register for uses and defs. - SmallVector, 8> Ops; - MIBundleOperands::VirtRegInfo RI = - MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); if (RI.Tied) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); @@ -939,10 +944,15 @@ void InlineSpiller::reMaterializeAll() { for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { unsigned Reg = RegsToSpill[i]; LiveInterval &LI = LIS.getInterval(Reg); - for (MachineRegisterInfo::use_bundle_nodbg_iterator - RI = MRI.use_bundle_nodbg_begin(Reg), E = MRI.use_bundle_nodbg_end(); - RI != E; ) { - MachineInstr *MI = &*(RI++); + for (MachineRegisterInfo::reg_bundle_iterator + RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); + RegI != E; ) { + MachineInstr *MI = &*(RegI++); + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) + continue; + anyRemat |= reMaterializeFor(LI, MI); } } @@ -1218,12 +1228,16 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Modify DBG_VALUE now that the value is in a spill slot. bool IsIndirect = MI->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(StackSlot) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); continue; } @@ -1363,7 +1377,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { StackInt = nullptr; DEBUG(dbgs() << "Inline spilling " - << MRI.getRegClass(edit.getReg())->getName() + << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) << ':' << edit.getParent() << "\nFrom original " << PrintReg(Original) << '\n'); assert(edit.getParent().isSpillable() && diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 91a1da9..1791afb 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_INTERFERENCECACHE -#define LLVM_CODEGEN_INTERFERENCECACHE +#ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H +#define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H #include "llvm/CodeGen/LiveIntervalUnion.h" diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index a8b8600..2c95e9e 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -459,9 +459,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { CI->replaceAllUsesWith(CI->getOperand(0)); break; + case Intrinsic::assume: case Intrinsic::var_annotation: - break; // Strip out annotate intrinsic - + break; // Strip out these intrinsics + case Intrinsic::memcpy: { Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, @@ -527,6 +528,34 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl"); break; } + case Intrinsic::sin: { + ReplaceFPIntrinsicWithCall(CI, "sinf", "sin", "sinl"); + break; + } + case Intrinsic::cos: { + ReplaceFPIntrinsicWithCall(CI, "cosf", "cos", "cosl"); + break; + } + case Intrinsic::floor: { + ReplaceFPIntrinsicWithCall(CI, "floorf", "floor", "floorl"); + break; + } + case Intrinsic::ceil: { + ReplaceFPIntrinsicWithCall(CI, "ceilf", "ceil", "ceill"); + break; + } + case Intrinsic::trunc: { + ReplaceFPIntrinsicWithCall(CI, "truncf", "trunc", "truncl"); + break; + } + case Intrinsic::round: { + ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl"); + break; + } + case Intrinsic::copysign: { + ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl"); + break; + } case Intrinsic::flt_rounds: // Lower to "round to the nearest" if (!CI->getType()->isVoidTy()) diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp deleted file mode 100644 index 96a5389..0000000 --- a/lib/CodeGen/JITCodeEmitter.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/JITCodeEmitter.h" - -using namespace llvm; - -void JITCodeEmitter::anchor() { } diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp index 61ef722..20f775c 100644 --- a/lib/CodeGen/JumpInstrTables.cpp +++ b/lib/CodeGen/JumpInstrTables.cpp @@ -163,7 +163,7 @@ void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const { Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { FunctionType *OrigFunTy = Target->getFunctionType(); - FunctionType *FunTy = transformType(OrigFunTy); + FunctionType *FunTy = transformType(JTType, OrigFunTy); JumpMap::iterator it = Metadata.find(FunTy); if (Metadata.end() == it) { @@ -191,11 +191,12 @@ Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { } bool JumpInstrTables::hasTable(FunctionType *FunTy) { - FunctionType *TransTy = transformType(FunTy); + FunctionType *TransTy = transformType(JTType, FunTy); return Metadata.end() != Metadata.find(TransTy); } -FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { +FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT, + FunctionType *FunTy) { // Returning nullptr forces all types into the same table, since all types map // to the same type Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext()); @@ -211,7 +212,7 @@ FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { Type *Int32Ty = Type::getInt32Ty(FunTy->getContext()); FunctionType *VoidFnTy = FunctionType::get( Type::getVoidTy(FunTy->getContext()), EmptyParams, false); - switch (JTType) { + switch (JTT) { case JumpTable::Single: return FunctionType::get(RetTy, EmptyParams, false); @@ -251,16 +252,12 @@ FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { } bool JumpInstrTables::runOnModule(Module &M) { - // Make sure the module is well-formed, especially with respect to jumptable. - if (verifyModule(M)) - return false; - JITI = &getAnalysis(); - // Get the set of jumptable-annotated functions. + // Get the set of jumptable-annotated functions that have their address taken. DenseMap Functions; for (Function &F : M) { - if (F.hasFnAttribute(Attribute::JumpTable)) { + if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) { assert(F.hasUnnamedAddr() && "Attribute 'jumptable' requires 'unnamed_addr'"); Functions[&F] = nullptr; diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index df96b94..61face2 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -13,8 +13,10 @@ #include "llvm/Target/TargetMachine.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/ForwardControlFlowIntegrity.h" #include "llvm/CodeGen/JumpInstrTables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -48,8 +50,8 @@ EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); void LLVMTargetMachine::initAsmInfo() { - MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), - TargetTriple); + MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo( + *getSubtargetImpl()->getRegisterInfo(), getTargetTriple()); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and // we'll crash later. @@ -110,9 +112,9 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // Install a MachineModuleInfo class, which is an immutable pass that holds // all the per-module stuff we're generating, including MCContext. - MachineModuleInfo *MMI = - new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), - &TM->getTargetLowering()->getObjFileLowering()); + MachineModuleInfo *MMI = new MachineModuleInfo( + *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(), + &TM->getSubtargetImpl()->getTargetLowering()->getObjFileLowering()); PM.add(MMI); // Set up a MachineFunction for the rest of CodeGen to work on. @@ -143,8 +145,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, AnalysisID StopAfter) { // Passes to handle jumptable function annotations. These can't be handled at // JIT time, so we don't add them directly to addPassesToGenerateCode. - PM.add(createJumpInstrTableInfoPass()); + PM.add(createJumpInstrTableInfoPass( + getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound())); PM.add(createJumpInstrTablesPass(Options.JTType)); + if (Options.FCFI) + PM.add(createForwardControlFlowIntegrityPass( + Options.JTType, Options.CFIType, Options.CFIEnforcing, + Options.getCFIFuncName())); // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, @@ -165,10 +172,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (Options.MCOptions.MCSaveTempLabels) Context->setAllowTemporaryLabels(false); - const MCAsmInfo &MAI = *getMCAsmInfo(); - const MCRegisterInfo &MRI = *getRegisterInfo(); - const MCInstrInfo &MII = *getInstrInfo(); const MCSubtargetInfo &STI = getSubtarget(); + const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); + const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo(); std::unique_ptr AsmStreamer; switch (FileType) { @@ -201,9 +208,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, if (!MCE || !MAB) return true; - AsmStreamer.reset(getTarget().createMCObjectStreamer( - getTargetTriple(), *Context, *MAB, Out, MCE, STI, - Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack)); + AsmStreamer.reset( + getTarget() + .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE, + STI, Options.MCOptions.MCRelaxAll)); break; } case CGFT_Null: @@ -226,26 +234,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return false; } -/// addPassesToEmitMachineCode - Add passes to the specified pass manager to -/// get machine code emitted. This uses a JITCodeEmitter object to handle -/// actually outputting the machine code and resolving things like the address -/// of functions. This method should return true if machine code emission is -/// not supported. -/// -bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, - JITCodeEmitter &JCE, - bool DisableVerify) { - // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, - nullptr); - if (!Context) - return true; - - addCodeEmitter(PM, JCE); - - return false; // success! -} - /// addPassesToEmitMC - Add passes to the specified pass manager to get /// machine code emitted with the MCJIT. This method returns true if machine /// code is not supported. It fills the MCContext Ctx pointer which can be @@ -265,19 +253,20 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, // Create the code emitter for the target if it exists. If not, .o file // emission fails. - const MCRegisterInfo &MRI = *getRegisterInfo(); + const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo(); const MCSubtargetInfo &STI = getSubtarget(); - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, - STI, *Ctx); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter( + *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (!MCE || !MAB) return true; std::unique_ptr AsmStreamer; - AsmStreamer.reset(getTarget().createMCObjectStreamer( - getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, - Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack)); + AsmStreamer.reset(getTarget() + .createMCObjectStreamer(getTargetTriple(), *Ctx, *MAB, + Out, MCE, STI, + Options.MCOptions.MCRelaxAll)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index d12c234..b621e3b 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -137,6 +137,8 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { + if (DL.isUnknown()) + return nullptr; MDNode *Scope = nullptr; MDNode *InlinedAt = nullptr; DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); @@ -172,9 +174,12 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { std::make_tuple(Parent, DIDescriptor(Scope), nullptr, false)).first; - if (!Parent && DIDescriptor(Scope).isSubprogram() && - DISubprogram(Scope).describes(MF->getFunction())) + if (!Parent) { + assert(DIDescriptor(Scope).isSubprogram()); + assert(DISubprogram(Scope).describes(MF->getFunction())); + assert(!CurrentFnLexicalScope); CurrentFnLexicalScope = &I->second; + } return &I->second; } @@ -285,7 +290,7 @@ void LexicalScopes::assignInstructionRanges( /// have machine instructions that belong to lexical scope identified by /// DebugLoc. void LexicalScopes::getMachineBasicBlocks( - DebugLoc DL, SmallPtrSet &MBBs) { + DebugLoc DL, SmallPtrSetImpl &MBBs) { MBBs.clear(); LexicalScope *Scope = getOrCreateLexicalScope(DL); if (!Scope) diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 7d5646b..1624851 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -39,6 +39,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include @@ -109,7 +110,8 @@ public: namespace { class LDVImpl; class UserValue { - const MDNode *variable; ///< The debug info variable we are part of. + const MDNode *Variable; ///< The debug info variable we are part of. + const MDNode *Expression; ///< Any complex address expression. unsigned offset; ///< Byte offset into variable. bool IsIndirect; ///< true if this is a register-indirect+offset value. DebugLoc dl; ///< The debug location for the variable. This is @@ -139,11 +141,10 @@ class UserValue { public: /// UserValue - Create a new UserValue. - UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L, - LocMap::Allocator &alloc) - : variable(var), offset(o), IsIndirect(i), dl(L), leader(this), - next(nullptr), locInts(alloc) - {} + UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i, + DebugLoc L, LocMap::Allocator &alloc) + : Variable(var), Expression(expr), offset(o), IsIndirect(i), dl(L), + leader(this), next(nullptr), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. UserValue *getLeader() { @@ -157,8 +158,10 @@ public: UserValue *getNext() const { return next; } /// match - Does this UserValue match the parameters? - bool match(const MDNode *Var, unsigned Offset, bool indirect) const { - return Var == variable && Offset == offset && indirect == IsIndirect; + bool match(const MDNode *Var, const MDNode *Expr, unsigned Offset, + bool indirect) const { + return Var == Variable && Expr == Expression && Offset == offset && + indirect == IsIndirect; } /// merge - Merge equivalence classes. @@ -267,7 +270,7 @@ public: LiveIntervals &LIS, const TargetInstrInfo &TRI); /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A - /// variable may have more than one corresponding DBG_VALUE instructions. + /// variable may have more than one corresponding DBG_VALUE instructions. /// Only first one needs DebugLoc to identify variable's lexical scope /// in source file. DebugLoc findDebugLoc(); @@ -306,8 +309,8 @@ class LDVImpl { UVMap userVarMap; /// getUserValue - Find or create a UserValue. - UserValue *getUserValue(const MDNode *Var, unsigned Offset, - bool IsIndirect, DebugLoc DL); + UserValue *getUserValue(const MDNode *Var, const MDNode *Expr, + unsigned Offset, bool IsIndirect, DebugLoc DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -344,6 +347,7 @@ public: "Dbg values are not emitted in LDV"); EmitDone = false; ModifiedMF = false; + LS.reset(); } /// mapVirtReg - Map virtual register to an equivalence class. @@ -360,8 +364,8 @@ public: } // namespace void UserValue::print(raw_ostream &OS, const TargetMachine *TM) { - DIVariable DV(variable); - OS << "!\""; + DIVariable DV(Variable); + OS << "!\""; DV.printExtendedName(OS); OS << "\"\t"; if (offset) @@ -421,19 +425,20 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { LDV->mapVirtReg(locations[i].getReg(), this); } -UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, - bool IsIndirect, DebugLoc DL) { +UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr, + unsigned Offset, bool IsIndirect, + DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); Leader = UV; for (; UV; UV = UV->getNext()) - if (UV->match(Var, Offset, IsIndirect)) + if (UV->match(Var, Expr, Offset, IsIndirect)) return UV; } userValues.push_back( - make_unique(Var, Offset, IsIndirect, DL, allocator)); + make_unique(Var, Expr, Offset, IsIndirect, DL, allocator)); UserValue *UV = userValues.back().get(); Leader = UserValue::merge(Leader, UV); return UV; @@ -453,7 +458,7 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { // DBG_VALUE loc, offset, variable - if (MI->getNumOperands() != 3 || + if (MI->getNumOperands() != 4 || !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) || !MI->getOperand(2).isMetadata()) { DEBUG(dbgs() << "Can't handle " << *MI); @@ -463,9 +468,11 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { // Get or create the UserValue for (variable,offset). bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *Var = MI->getOperand(2).getMetadata(); + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); //here. - UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc()); + UserValue *UV = + getUserValue(Var, Expr, Offset, IsIndirect, MI->getDebugLoc()); UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -698,7 +705,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { MF = &mf; LIS = &pass.getAnalysis(); MDT = &pass.getAnalysis(); - TRI = mf.getTarget().getRegisterInfo(); + TRI = mf.getSubtarget().getRegisterInfo(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); @@ -710,11 +717,25 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { return Changed; } +static void removeDebugValues(MachineFunction &mf) { + for (MachineBasicBlock &MBB : mf) { + for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) { + if (!MBBI->isDebugValue()) { + ++MBBI; + continue; + } + MBBI = MBB.erase(MBBI); + } + } +} + bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; - if (!FunctionDIs.count(mf.getFunction())) + if (!FunctionDIs.count(mf.getFunction())) { + removeDebugValues(mf); return false; + } if (!pImpl) pImpl = new LDVImpl(this); return static_cast(pImpl)->runOnMachineFunction(mf); @@ -936,10 +957,13 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, if (Loc.isReg()) BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, Loc.getReg(), offset, variable); + IsIndirect, Loc.getReg(), offset, Variable, Expression); else BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(Loc).addImm(offset).addMetadata(variable); + .addOperand(Loc) + .addImm(offset) + .addMetadata(Variable) + .addMetadata(Expression); } void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, @@ -979,7 +1003,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); if (!MF) return; - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); userValues[i]->rewriteLocations(*VRM, *TRI); diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index 7ec0d17..7e3b361 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -18,8 +18,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H -#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H +#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H +#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/DebugInfo.h" @@ -72,4 +72,4 @@ private: } // namespace llvm -#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H +#endif diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index ce8ce96..ddb0032 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -206,7 +206,7 @@ void LiveRange::RenumberValues() { valnos.clear(); for (const_iterator I = begin(), E = end(); I != E; ++I) { VNInfo *VNI = I->valno; - if (!Seen.insert(VNI)) + if (!Seen.insert(VNI).second) continue; assert(!VNI->isUnused() && "Unused valno used by live segment"); VNI->id = (unsigned)valnos.size(); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 1559560..1742e63 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -34,8 +34,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include #include #include @@ -110,9 +110,8 @@ void LiveIntervals::releaseMemory() { bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MF = &fn; MRI = &MF->getRegInfo(); - TM = &fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); AA = &getAnalysis(); Indexes = &getAnalysis(); DomTree = &getAnalysis(); @@ -380,12 +379,13 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? - if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI)) + if (!VNI->isPHIDef() || VNI->def != BlockStart || + !UsedPHIs.insert(VNI).second) continue; // The PHI is live, make sure the predecessors are live-out. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - if (!LiveOut.insert(*PI)) + if (!LiveOut.insert(*PI).second) continue; SlotIndex Stop = getMBBEndIdx(*PI); // A predecessor is not required to have a live-out value for a PHI. @@ -402,7 +402,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - if (!LiveOut.insert(*PI)) + if (!LiveOut.insert(*PI).second) continue; SlotIndex Stop = getMBBEndIdx(*PI); assert(li->getVNInfoBefore(Stop) == VNI && @@ -785,7 +785,7 @@ private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. void updateRange(LiveRange &LR, unsigned Reg) { - if (!Updated.insert(&LR)) + if (!Updated.insert(&LR).second) return; DEBUG({ dbgs() << " "; diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 67ab559..345d6c4 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -19,8 +19,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_LIVERANGECALC_H -#define LLVM_CODEGEN_LIVERANGECALC_H +#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H +#define LLVM_LIB_CODEGEN_LIVERANGECALC_H #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index c27d630..a0fb712 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -411,8 +411,11 @@ LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, for (unsigned I = 0, Size = size(); I < Size; ++I) { LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) - DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to " - << MRI.getRegClass(LI.reg)->getName() << '\n'); + DEBUG({ + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + dbgs() << "Inflated " << PrintReg(LI.reg) << " to " + << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n'; + }); VRAI.calculateSpillWeightAndHint(LI); } } diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index de2ce22..a8cae08 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -48,7 +47,7 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { } bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { - TRI = MF.getTarget().getRegisterInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); LIS = &getAnalysis(); VRM = &getAnalysis(); diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index b3161a4..8a6ac25 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -49,7 +50,7 @@ void LiveStacks::releaseMemory() { } bool LiveStacks::runOnMachineFunction(MachineFunction &MF) { - TRI = MF.getTarget().getRegisterInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); // FIXME: No analysis is being done right now. We are relying on the // register allocators to provide the information. return false; @@ -80,7 +81,7 @@ void LiveStacks::print(raw_ostream &OS, const Module*) const { int Slot = I->first; const TargetRegisterClass *RC = getIntervalRegClass(Slot); if (RC) - OS << " [" << RC->getName() << "]\n"; + OS << " [" << TRI->getRegClassName(RC) << "]\n"; else OS << " [Unknown]\n"; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 758b216..c4bca5f 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include using namespace llvm; @@ -497,17 +496,135 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, } } +void LiveVariables::runOnInstr(MachineInstr *MI, + SmallVectorImpl &Defs) { + assert(!MI->isDebugValue()); + // Process all of the operands of the instruction... + unsigned NumOperandsToProcess = MI->getNumOperands(); + + // Unless it is a PHI node. In this case, ONLY process the DEF, not any + // of the uses. They will be handled in other basic blocks. + if (MI->isPHI()) + NumOperandsToProcess = 1; + + // Clear kill and dead markers. LV will recompute them. + SmallVector UseRegs; + SmallVector DefRegs; + SmallVector RegMasks; + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) { + RegMasks.push_back(i); + continue; + } + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + MO.setIsKill(false); + if (MO.readsReg()) + UseRegs.push_back(MOReg); + } else /*MO.isDef()*/ { + MO.setIsDead(false); + DefRegs.push_back(MOReg); + } + } + + MachineBasicBlock *MBB = MI->getParent(); + // Process all uses. + for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { + unsigned MOReg = UseRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegUse(MOReg, MBB, MI); + else if (!MRI->isReserved(MOReg)) + HandlePhysRegUse(MOReg, MI); + } + + // Process all masked registers. (Call clobbers). + for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) + HandleRegMask(MI->getOperand(RegMasks[i])); + + // Process all defs. + for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { + unsigned MOReg = DefRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegDef(MOReg, MI); + else if (!MRI->isReserved(MOReg)) + HandlePhysRegDef(MOReg, MI, Defs); + } + UpdatePhysRegDefs(MI, Defs); +} + +void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { + // Mark live-in registers as live-in. + SmallVector Defs; + for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), + EE = MBB->livein_end(); II != EE; ++II) { + assert(TargetRegisterInfo::isPhysicalRegister(*II) && + "Cannot have a live-in virtual register!"); + HandlePhysRegDef(*II, nullptr, Defs); + } + + // Loop over all of the instructions, processing them. + DistanceMap.clear(); + unsigned Dist = 0; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + DistanceMap.insert(std::make_pair(MI, Dist++)); + + runOnInstr(MI, Defs); + } + + // Handle any virtual assignments from PHI nodes which might be at the + // bottom of this basic block. We check all of our successor blocks to see + // if they have PHI nodes, and if so, we simulate an assignment at the end + // of the current block. + if (!PHIVarInfo[MBB->getNumber()].empty()) { + SmallVectorImpl &VarInfoVec = PHIVarInfo[MBB->getNumber()]; + + for (SmallVectorImpl::iterator I = VarInfoVec.begin(), + E = VarInfoVec.end(); I != E; ++I) + // Mark it alive only in the block we are representing. + MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), + MBB); + } + + // MachineCSE may CSE instructions which write to non-allocatable physical + // registers across MBBs. Remember if any reserved register is liveout. + SmallSet LiveOuts; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->isLandingPad()) + continue; + for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), + LE = SuccMBB->livein_end(); LI != LE; ++LI) { + unsigned LReg = *LI; + if (!TRI->isInAllocatableClass(LReg)) + // Ignore other live-ins, e.g. those that are live into landing pads. + LiveOuts.insert(LReg); + } + } + + // Loop over PhysRegDef / PhysRegUse, killing any registers that are + // available at the end of the basic block. + for (unsigned i = 0; i != NumRegs; ++i) + if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) + HandlePhysRegDef(i, nullptr, Defs); +} + bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); - TRI = MF->getTarget().getRegisterInfo(); - - unsigned NumRegs = TRI->getNumRegs(); - PhysRegDef = new MachineInstr*[NumRegs]; - PhysRegUse = new MachineInstr*[NumRegs]; - PHIVarInfo = new SmallVector[MF->getNumBlockIDs()]; - std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); - std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); + TRI = MF->getSubtarget().getRegisterInfo(); + + const unsigned NumRegs = TRI->getNumRegs(); + PhysRegDef.assign(NumRegs, nullptr); + PhysRegUse.assign(NumRegs, nullptr); + PHIVarInfo.resize(MF->getNumBlockIDs()); PHIJoins.clear(); // FIXME: LiveIntervals will be updated to remove its dependence on @@ -525,124 +642,11 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *Entry = MF->begin(); SmallPtrSet Visited; - for (df_ext_iterator > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MachineBasicBlock *MBB = *DFI; - - // Mark live-in registers as live-in. - SmallVector Defs; - for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(), - EE = MBB->livein_end(); II != EE; ++II) { - assert(TargetRegisterInfo::isPhysicalRegister(*II) && - "Cannot have a live-in virtual register!"); - HandlePhysRegDef(*II, nullptr, Defs); - } - - // Loop over all of the instructions, processing them. - DistanceMap.clear(); - unsigned Dist = 0; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - MachineInstr *MI = I; - if (MI->isDebugValue()) - continue; - DistanceMap.insert(std::make_pair(MI, Dist++)); - - // Process all of the operands of the instruction... - unsigned NumOperandsToProcess = MI->getNumOperands(); - - // Unless it is a PHI node. In this case, ONLY process the DEF, not any - // of the uses. They will be handled in other basic blocks. - if (MI->isPHI()) - NumOperandsToProcess = 1; - - // Clear kill and dead markers. LV will recompute them. - SmallVector UseRegs; - SmallVector DefRegs; - SmallVector RegMasks; - for (unsigned i = 0; i != NumOperandsToProcess; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) { - RegMasks.push_back(i); - continue; - } - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned MOReg = MO.getReg(); - if (MO.isUse()) { - MO.setIsKill(false); - if (MO.readsReg()) - UseRegs.push_back(MOReg); - } else /*MO.isDef()*/ { - MO.setIsDead(false); - DefRegs.push_back(MOReg); - } - } - - // Process all uses. - for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { - unsigned MOReg = UseRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) - HandleVirtRegUse(MOReg, MBB, MI); - else if (!MRI->isReserved(MOReg)) - HandlePhysRegUse(MOReg, MI); - } - - // Process all masked registers. (Call clobbers). - for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) - HandleRegMask(MI->getOperand(RegMasks[i])); - - // Process all defs. - for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { - unsigned MOReg = DefRegs[i]; - if (TargetRegisterInfo::isVirtualRegister(MOReg)) - HandleVirtRegDef(MOReg, MI); - else if (!MRI->isReserved(MOReg)) - HandlePhysRegDef(MOReg, MI, Defs); - } - UpdatePhysRegDefs(MI, Defs); - } - - // Handle any virtual assignments from PHI nodes which might be at the - // bottom of this basic block. We check all of our successor blocks to see - // if they have PHI nodes, and if so, we simulate an assignment at the end - // of the current block. - if (!PHIVarInfo[MBB->getNumber()].empty()) { - SmallVectorImpl &VarInfoVec = PHIVarInfo[MBB->getNumber()]; - - for (SmallVectorImpl::iterator I = VarInfoVec.begin(), - E = VarInfoVec.end(); I != E; ++I) - // Mark it alive only in the block we are representing. - MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), - MBB); - } - - // MachineCSE may CSE instructions which write to non-allocatable physical - // registers across MBBs. Remember if any reserved register is liveout. - SmallSet LiveOuts; - for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB->isLandingPad()) - continue; - for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(), - LE = SuccMBB->livein_end(); LI != LE; ++LI) { - unsigned LReg = *LI; - if (!TRI->isInAllocatableClass(LReg)) - // Ignore other live-ins, e.g. those that are live into landing pads. - LiveOuts.insert(LReg); - } - } - - // Loop over PhysRegDef / PhysRegUse, killing any registers that are - // available at the end of the basic block. - for (unsigned i = 0; i != NumRegs; ++i) - if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) - HandlePhysRegDef(i, nullptr, Defs); + for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) { + runOnBlock(MBB, NumRegs); - std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); - std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); + PhysRegDef.assign(NumRegs, nullptr); + PhysRegUse.assign(NumRegs, nullptr); } // Convert and transfer the dead / killed information we have gathered into @@ -664,9 +668,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { assert(Visited.count(&*i) != 0 && "unreachable basic block found"); #endif - delete[] PhysRegDef; - delete[] PhysRegUse; - delete[] PHIVarInfo; + PhysRegDef.clear(); + PhysRegUse.clear(); + PHIVarInfo.clear(); return false; } diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 36885e8..5c5712f 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -102,7 +103,7 @@ INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned LocalObjectCount = MFI->getObjectIndexEnd(); // If the target doesn't want/need this pass, or if there are no locals @@ -183,7 +184,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *MFI = Fn.getFrameInfo(); - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int64_t Offset = 0; @@ -272,8 +273,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { bool UsedBaseReg = false; MachineFrameInfo *MFI = Fn.getFrameInfo(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 08fef5f..3058b1a 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -54,7 +55,8 @@ MCSymbol *MachineBasicBlock::getSymbol() const { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); const TargetMachine &TM = MF->getTarget(); - const char *Prefix = TM.getDataLayout()->getPrivateGlobalPrefix(); + const char *Prefix = + TM.getSubtargetImpl()->getDataLayout()->getPrivateGlobalPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -290,7 +292,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\n'; - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (!livein_empty()) { if (Indexes) OS << '\t'; OS << " Live Ins:"; @@ -359,7 +361,7 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) { bool LiveIn = isLiveIn(PhysReg); iterator I = SkipPHIsAndLabels(begin()), E = end(); MachineRegisterInfo &MRI = getParent()->getRegInfo(); - const TargetInstrInfo &TII = *getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo(); // Look for an existing copy. if (LiveIn) @@ -390,7 +392,7 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { } void MachineBasicBlock::updateTerminator() { - const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); // A block with no successors has no concerns with fall-through edges. if (this->succ_empty()) return; @@ -645,7 +647,7 @@ bool MachineBasicBlock::canFallThrough() { // Analyze the branches, if any, at the end of the block. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; - const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { // If we couldn't analyze the branch, examine the last instruction. // If the block doesn't end in a known control barrier, assume fallthrough @@ -690,7 +692,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) @@ -795,7 +797,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); - MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl); + MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, + dl); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); @@ -823,7 +826,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addLiveIn(*I); // Update LiveVariables. - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); if (LV) { // Restore kills of virtual registers that were killed by the terminators. while (!KilledRegs.empty()) { @@ -903,31 +906,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } if (MachineDominatorTree *MDT = - P->getAnalysisIfAvailable()) { - // Update dominator information. - MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ); - - bool IsNewIDom = true; - for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end(); - PI != E; ++PI) { - MachineBasicBlock *PredBB = *PI; - if (PredBB == NMBB) - continue; - if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) { - IsNewIDom = false; - break; - } - } - - // We know "this" dominates the newly created basic block. - MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this); - - // If all the other predecessors of "Succ" are dominated by "Succ" itself - // then the new block is the new immediate dominator of "Succ". Otherwise, - // the new block doesn't dominate anything. - if (IsNewIDom) - MDT->changeImmediateDominator(SucccDTNode, NewDTNode); - } + P->getAnalysisIfAvailable()) + MDT->recordSplitCriticalEdge(this, Succ, NMBB); if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable()) if (MachineLoop *TIL = MLI->getLoopFor(this)) { @@ -1086,7 +1066,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineBasicBlock::succ_iterator SI = succ_begin(); while (SI != succ_end()) { const MachineBasicBlock *MBB = *SI; - if (!SeenMBBs.insert(MBB) || + if (!SeenMBBs.insert(MBB).second || (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) { // This is a superfluous edge, remove it. SI = removeSuccessor(SI); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 74af1e2..08fd200 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -812,7 +813,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BE = L.block_end(); BI != BE; ++BI) { BlockChain &Chain = *BlockToChain[*BI]; - if (!UpdatedPreds.insert(&Chain)) + if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); @@ -913,7 +914,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = &*FI; BlockChain &Chain = *BlockToChain[BB]; - if (!UpdatedPreds.insert(&Chain)) + if (!UpdatedPreds.insert(&Chain).second) continue; assert(Chain.LoopPredecessors == 0); @@ -1111,8 +1112,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { MBPI = &getAnalysis(); MBFI = &getAnalysis(); MLI = &getAnalysis(); - TII = F.getTarget().getInstrInfo(); - TLI = F.getTarget().getTargetLowering(); + TII = F.getSubtarget().getInstrInfo(); + TLI = F.getSubtarget().getTargetLowering(); assert(BlockToChain.empty()); buildCFGChains(F); diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index c2ab76e..ae26967 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "machine-cse" @@ -78,7 +79,8 @@ namespace { SmallVector Exps; unsigned CurrVN; - bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); + bool PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB); bool isPhysDefTriviallyDead(unsigned Reg, MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator E) const; @@ -112,8 +114,12 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MachineCSE, "machine-cse", "Machine Common Subexpression Elimination", false, false) -bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, - MachineBasicBlock *MBB) { +/// The source register of a COPY machine instruction can be propagated to all +/// its users, and this propagation could increase the probability of finding +/// common subexpressions. If the COPY has only one user, the COPY itself can +/// be removed. +bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB) { bool Changed = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -122,10 +128,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (!MRI->hasOneNonDBGUse(Reg)) - // Only coalesce single use copies. This ensure the copy will be - // deleted. - continue; + bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); MachineInstr *DefMI = MRI->getVRegDef(Reg); if (!DefMI->isCopy()) continue; @@ -153,10 +156,14 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); + // Propagate SrcReg of copies to MI. MO.setReg(SrcReg); MRI->clearKillFlags(SrcReg); - DefMI->eraseFromParent(); - ++NumCoalesces; + // Coalesce single use copies. + if (OnlyOneUse) { + DefMI->eraseFromParent(); + ++NumCoalesces; + } Changed = true; } @@ -453,13 +460,15 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool FoundCSE = VNT.count(MI); if (!FoundCSE) { - // Look for trivial copy coalescing opportunities. - if (PerformTrivialCoalescing(MI, MBB)) { + // Using trivial copy propagation to find more CSE opportunities. + if (PerformTrivialCopyPropagation(MI, MBB)) { Changed = true; // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; + + // Try again to see if CSE is possible. FoundCSE = VNT.count(MI); } } @@ -663,8 +672,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); AA = &getAnalysis(); DT = &getAnalysis(); diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/CodeGen/MachineCodeEmitter.cpp deleted file mode 100644 index 81b4978..0000000 --- a/lib/CodeGen/MachineCodeEmitter.cpp +++ /dev/null @@ -1,14 +0,0 @@ -//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/MachineCodeEmitter.h" - -using namespace llvm; - -void MachineCodeEmitter::anchor() { } diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp new file mode 100644 index 0000000..2931258 --- /dev/null +++ b/lib/CodeGen/MachineCombiner.cpp @@ -0,0 +1,435 @@ +//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The machine combiner pass uses machine trace metrics to ensure the combined +// instructions does not lengthen the critical path or the resource depth. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "machine-combiner" + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +STATISTIC(NumInstCombined, "Number of machineinst combined"); + +namespace { +class MachineCombiner : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MCSchedModel SchedModel; + MachineRegisterInfo *MRI; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + + TargetSchedModel TSchedModel; + + /// OptSize - True if optimizing for code size. + bool OptSize; + +public: + static char ID; + MachineCombiner() : MachineFunctionPass(ID) { + initializeMachineCombinerPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { return "Machine InstCombiner"; } + +private: + bool doSubstitute(unsigned NewSize, unsigned OldSize); + bool combineInstructions(MachineBasicBlock *); + MachineInstr *getOperandDef(const MachineOperand &MO); + unsigned getDepth(SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace); + unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace); + bool + preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg); + bool preservesResourceLen(MachineBasicBlock *MBB, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs); + void instr2instrSC(SmallVectorImpl &Instrs, + SmallVectorImpl &InstrsSC); +}; +} + +char MachineCombiner::ID = 0; +char &llvm::MachineCombinerID = MachineCombiner::ID; + +INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", + "Machine InstCombiner", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", + false, false) + +void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { + MachineInstr *DefInstr = nullptr; + // We need a virtual register definition. + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + DefInstr = MRI->getUniqueVRegDef(MO.getReg()); + // PHI's have no depth etc. + if (DefInstr && DefInstr->isPHI()) + DefInstr = nullptr; + return DefInstr; +} + +/// getDepth - Computes depth of instructions in vector \InsInstr. +/// +/// \param InsInstrs is a vector of machine instructions +/// \param InstrIdxForVirtReg is a dense map of virtual register to index +/// of defining machine instruction in \p InsInstrs +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Depth of last instruction in \InsInstrs ("NewRoot") +unsigned +MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace) { + + SmallVector InstrDepth; + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + + // Foreach instruction in in the new sequence compute the depth based on the + // operands. Use the trace information when possible. For new operands which + // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth + for (auto *InstrPtr : InsInstrs) { // for each Use + unsigned IDepth = 0; + DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); + for (unsigned i = 0, e = InstrPtr->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = InstrPtr->getOperand(i); + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isUse()) + continue; + unsigned DepthOp = 0; + unsigned LatencyOp = 0; + DenseMap::iterator II = + InstrIdxForVirtReg.find(MO.getReg()); + if (II != InstrIdxForVirtReg.end()) { + // Operand is new virtual register not in trace + assert(II->second < InstrDepth.size() && "Bad Index"); + MachineInstr *DefInstr = InsInstrs[II->second]; + assert(DefInstr && + "There must be a definition for a new virtual register"); + DepthOp = InstrDepth[II->second]; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } else { + MachineInstr *DefInstr = getOperandDef(MO); + if (DefInstr) { + DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } + } + IDepth = std::max(IDepth, DepthOp + LatencyOp); + } + InstrDepth.push_back(IDepth); + } + unsigned NewRootIdx = InsInstrs.size() - 1; + return InstrDepth[NewRootIdx]; +} + +/// getLatency - Computes instruction latency as max of latency of defined +/// operands +/// +/// \param Root is a machine instruction that could be replaced by NewRoot. +/// It is used to compute a more accurate latency information for NewRoot in +/// case there is a dependent instruction in the same trace (\p BlockTrace) +/// \param NewRoot is the instruction for which the latency is computed +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Latency of \p NewRoot +unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace) { + + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + + // Check each definition in NewRoot and compute the latency + unsigned NewRootLatency = 0; + + for (unsigned i = 0, e = NewRoot->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = NewRoot->getOperand(i); + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isDef()) + continue; + // Get the first instruction that uses MO + MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); + RI++; + MachineInstr *UseMO = RI->getParent(); + unsigned LatencyOp = 0; + if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) { + LatencyOp = TSchedModel.computeOperandLatency( + NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, + UseMO->findRegisterUseOperandIdx(MO.getReg())); + } else { + LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode()); + } + NewRootLatency = std::max(NewRootLatency, LatencyOp); + } + return NewRootLatency; +} + +/// preservesCriticalPathlen - True when the new instruction sequence does not +/// lengthen the critical path. The DAGCombine code sequence ends in MI +/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A +/// necessary condition for the new sequence to replace the old sequence is that +/// is cannot lengthen the critical path. This is decided by the formula +/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). +/// The slack is the number of cycles Root can be delayed before the critical +/// patch becomes longer. +bool MachineCombiner::preservesCriticalPathLen( + MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg) { + + assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); + // NewRoot is the last instruction in the \p InsInstrs vector + // Get depth and latency of NewRoot + unsigned NewRootIdx = InsInstrs.size() - 1; + MachineInstr *NewRoot = InsInstrs[NewRootIdx]; + unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); + + // Get depth, latency and slack of Root + unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + unsigned RootLatency = TSchedModel.computeInstrLatency(Root); + unsigned RootSlack = BlockTrace.getInstrSlack(Root); + + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + dbgs() << " NewRootDepth: " << NewRootDepth + << " NewRootLatency: " << NewRootLatency << "\n"; + dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency + << " RootSlack: " << RootSlack << "\n"; + dbgs() << " NewRootDepth + NewRootLatency " + << NewRootDepth + NewRootLatency << "\n"; + dbgs() << " RootDepth + RootLatency + RootSlack " + << RootDepth + RootLatency + RootSlack << "\n";); + + /// True when the new sequence does not lenghten the critical path. + return ((NewRootDepth + NewRootLatency) <= + (RootDepth + RootLatency + RootSlack)); +} + +/// helper routine to convert instructions into SC +void MachineCombiner::instr2instrSC( + SmallVectorImpl &Instrs, + SmallVectorImpl &InstrsSC) { + for (auto *InstrPtr : Instrs) { + unsigned Opc = InstrPtr->getOpcode(); + unsigned Idx = TII->get(Opc).getSchedClass(); + const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx); + InstrsSC.push_back(SC); + } +} +/// preservesResourceLen - True when the new instructions do not increase +/// resource length +bool MachineCombiner::preservesResourceLen( + MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs) { + + // Compute current resource length + + //ArrayRef MBBarr(MBB); + SmallVector MBBarr; + MBBarr.push_back(MBB); + unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr); + + // Deal with SC rather than Instructions. + SmallVector InsInstrsSC; + SmallVector DelInstrsSC; + + instr2instrSC(InsInstrs, InsInstrsSC); + instr2instrSC(DelInstrs, DelInstrsSC); + + ArrayRef MSCInsArr = makeArrayRef(InsInstrsSC); + ArrayRef MSCDelArr = makeArrayRef(DelInstrsSC); + + // Compute new resource length + unsigned ResLenAfterCombine = + BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); + + DEBUG(dbgs() << "RESOURCE DATA: \n"; + dbgs() << " resource len before: " << ResLenBeforeCombine + << " after: " << ResLenAfterCombine << "\n";); + + return ResLenAfterCombine <= ResLenBeforeCombine; +} + +/// \returns true when new instruction sequence should be generated +/// independent if it lenghtens critical path or not +bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { + if (OptSize && (NewSize < OldSize)) + return true; + if (!TSchedModel.hasInstrSchedModel()) + return true; + return false; +} + +/// combineInstructions - substitute a slow code sequence with a faster one by +/// evaluating instruction combining pattern. +/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction +/// combining based on machine trace metrics. Only combine a sequence of +/// instructions when this neither lengthens the critical path nor increases +/// resource pressure. When optimizing for codesize always combine when the new +/// sequence is shorter. +bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { + bool Changed = false; + DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); + + auto BlockIter = MBB->begin(); + + while (BlockIter != MBB->end()) { + auto &MI = *BlockIter++; + + DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); + SmallVector Pattern; + // The motivating example is: + // + // MUL Other MUL_op1 MUL_op2 Other + // \ / \ | / + // ADD/SUB => MADD/MSUB + // (=Root) (=NewRoot) + + // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is + // usually beneficial for code size it unfortunately can hurt performance + // when the ADD is on the critical path, but the MUL is not. With the + // substitution the MUL becomes part of the critical path (in form of the + // MADD) and can lengthen it on architectures where the MADD latency is + // longer than the ADD latency. + // + // For each instruction we check if it can be the root of a combiner + // pattern. Then for each pattern the new code sequence in form of MI is + // generated and evaluated. When the efficiency criteria (don't lengthen + // critical path, don't use more resources) is met the new sequence gets + // hooked up into the basic block before the old sequence is removed. + // + // The algorithm does not try to evaluate all patterns and pick the best. + // This is only an artificial restriction though. In practice there is + // mostly one pattern and hasPattern() can order patterns based on an + // internal cost heuristic. + + if (TII->hasPattern(MI, Pattern)) { + for (auto P : Pattern) { + SmallVector InsInstrs; + SmallVector DelInstrs; + DenseMap InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!InsInstrs.size()) + continue; + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lenghten the critical path nor increases + // resource pressure. + if (doSubstitute(InsInstrs.size(), DelInstrs.size()) || + (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator) & MI, + (MachineInstr *)InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + + Changed = true; + ++NumInstCombined; + + Traces->invalidate(MBB); + Traces->verifyAnalysis(); + // Eagerly stop after the first pattern fired + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + for (auto *InstrPtr : InsInstrs) { + MachineFunction *MF = MBB->getParent(); + MF->DeleteMachineInstr((MachineInstr *)InstrPtr); + } + } + InstrIdxForVirtReg.clear(); + } + } + } + + return Changed; +} + +bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { + const TargetSubtargetInfo &STI = + MF.getTarget().getSubtarget(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + SchedModel = STI.getSchedModel(); + TSchedModel.init(SchedModel, &STI, TII); + MRI = &MF.getRegInfo(); + Traces = &getAnalysis(); + MinInstr = 0; + + OptSize = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + + DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); + if (!TII->useMachineCombiner()) { + DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); + return false; + } + + bool Changed = false; + + // Try to combine instructions. + for (auto &MBB : MF) + Changed |= combineInstructions(&MBB); + + return Changed; +} diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 3119a35..cbd6272 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "codegen-cp" @@ -335,8 +336,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp new file mode 100644 index 0000000..0bee846 --- /dev/null +++ b/lib/CodeGen/MachineDominanceFrontier.cpp @@ -0,0 +1,54 @@ +//===- MachineDominanceFrontier.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" +#include "llvm/CodeGen/Passes.h" + + +using namespace llvm; + +namespace llvm { +template class DominanceFrontierBase; +template class ForwardDominanceFrontierBase; +} + + +char MachineDominanceFrontier::ID = 0; + +INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier", + "Machine Dominance Frontier Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier", + "Machine Dominance Frontier Construction", true, true) + +MachineDominanceFrontier::MachineDominanceFrontier() + : MachineFunctionPass(ID), + Base() { + initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry()); +} + +char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID; + +bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + Base.analyze(getAnalysis().getBase()); + return false; +} + +void MachineDominanceFrontier::releaseMemory() { + Base.releaseMemory(); +} + +void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 04c8ecb..df60cf3 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -35,6 +35,8 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { } bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { + CriticalEdgesToSplit.clear(); + NewBBs.clear(); DT->recalculate(F); return false; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 6138aef..8a2b610 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "codegen" @@ -52,17 +53,19 @@ void ilist_traits::deleteNode(MachineBasicBlock *MBB) { } MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, - unsigned FunctionNum, MachineModuleInfo &mmi, - GCModuleInfo* gmi) - : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) { - if (TM.getRegisterInfo()) - RegInfo = new (Allocator) MachineRegisterInfo(TM); + unsigned FunctionNum, MachineModuleInfo &mmi) + : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()), + MMI(mmi) { + if (STI->getRegisterInfo()) + RegInfo = new (Allocator) MachineRegisterInfo(this); else RegInfo = nullptr; MFInfo = nullptr; - FrameInfo = - new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack")); + FrameInfo = new (Allocator) + MachineFrameInfo(STI->getFrameLowering()->getStackAlignment(), + STI->getFrameLowering()->isStackRealignable(), + !F->hasFnAttribute("no-realign-stack")); if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackAlignment)) @@ -70,13 +73,13 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, getStackAlignment(AttributeSet::FunctionIndex)); ConstantPool = new (Allocator) MachineConstantPool(TM); - Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); + Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) Alignment = std::max(Alignment, - TM.getTargetLowering()->getPrefFunctionAlignment()); + STI->getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; JumpTableInfo = nullptr; @@ -229,10 +232,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand * MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, const MDNode *Ranges) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, - TBAAInfo, Ranges); + AAInfo, Ranges); } MachineMemOperand * @@ -243,12 +246,12 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineMemOperand(MachinePointerInfo(MMO->getValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, - MMO->getBaseAlignment(), nullptr); + MMO->getBaseAlignment()); return new (Allocator) MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, - MMO->getBaseAlignment(), nullptr); + MMO->getBaseAlignment()); } MachineInstr::mmo_iterator @@ -279,7 +282,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, getMachineMemOperand((*I)->getPointerInfo(), (*I)->getFlags() & ~MachineMemOperand::MOStore, (*I)->getSize(), (*I)->getBaseAlignment(), - (*I)->getTBAAInfo()); + (*I)->getAAInfo()); Result[Index] = JustLoad; } ++Index; @@ -311,7 +314,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, getMachineMemOperand((*I)->getPointerInfo(), (*I)->getFlags() & ~MachineMemOperand::MOLoad, (*I)->getSize(), (*I)->getBaseAlignment(), - (*I)->getTBAAInfo()); + (*I)->getAAInfo()); Result[Index] = JustStore; } ++Index; @@ -350,7 +353,7 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { // Print Constant Pool ConstantPool->print(OS); - const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = getSubtarget().getRegisterInfo(); if (RegInfo && !RegInfo->livein_empty()) { OS << "Function Live Ins: "; @@ -459,7 +462,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// normal 'L' label is returned. MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { - const DataLayout *DL = getTarget().getDataLayout(); + const DataLayout *DL = getSubtarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); @@ -474,7 +477,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// getPICBaseSymbol - Return a function-local symbol to represent the PIC /// base. MCSymbol *MachineFunction::getPICBaseSymbol() const { - const DataLayout *DL = getTarget().getDataLayout(); + const DataLayout *DL = getSubtarget().getDataLayout(); return Ctx.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+ Twine(getFunctionNumber())+"$pb"); } @@ -483,15 +486,11 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { // MachineFrameInfo implementation //===----------------------------------------------------------------------===// -const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const { - return TM.getFrameLowering(); -} - /// ensureMaxAlignment - Make sure the function is at least Align bytes /// aligned. void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!getFrameLowering()->isStackRealignable() || !RealignOption) - assert(Align <= getFrameLowering()->getStackAlignment() && + if (!StackRealignable || !RealignOption) + assert(Align <= StackAlignment && "For targets without stack realignment, Align is out of limit!"); if (MaxAlignment < Align) MaxAlignment = Align; } @@ -513,11 +512,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca)); + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, + !isSS)); int Index = (int)Objects.size() - NumFixedObjects - 1; assert(Index >= 0 && "Bad frame index!"); ensureMaxAlignment(Alignment); @@ -530,9 +528,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = clampStackAlignment( - !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, - getFrameLowering()->getStackAlignment()); + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -547,10 +544,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; - Alignment = clampStackAlignment( - !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, - getFrameLowering()->getStackAlignment()); - Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; } @@ -561,20 +557,18 @@ int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, /// index with a negative value. /// int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable) { + bool Immutable, bool isAliased) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); // The alignment of the frame index can be determined from its offset from // the incoming frame position. If the frame object is at offset 32 and // the stack is guaranteed to be 16-byte aligned, then we know that the // object is 16-byte aligned. - unsigned StackAlign = getFrameLowering()->getStackAlignment(); - unsigned Align = MinAlign(SPOffset, StackAlign); - Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + unsigned Align = MinAlign(SPOffset, StackAlignment); + Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, + StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, - /*Alloca*/ nullptr)); + /*Alloca*/ nullptr, isAliased)); return -++NumFixedObjects; } @@ -582,15 +576,14 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, /// on the stack. Returns an index with a negative value. int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset) { - unsigned StackAlign = getFrameLowering()->getStackAlignment(); - unsigned Align = MinAlign(SPOffset, StackAlign); - Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + unsigned Align = MinAlign(SPOffset, StackAlignment); + Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, + StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, /*Immutable*/ true, /*isSS*/ true, - /*Alloca*/ nullptr)); + /*Alloca*/ nullptr, + /*isAliased*/ false)); return -++NumFixedObjects; } @@ -600,7 +593,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { const MachineFunction *MF = MBB->getParent(); assert(MF && "MBB must be part of a MachineFunction"); const TargetMachine &TM = MF->getTarget(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); BitVector BV(TRI->getNumRegs()); // Before CSI is calculated, no registers are considered pristine. They can be @@ -625,8 +618,8 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { } unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); unsigned MaxAlign = getMaxAlignment(); int Offset = 0; @@ -676,7 +669,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; - const TargetFrameLowering *FI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); OS << "Frame Objects:\n"; @@ -820,7 +813,7 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } const DataLayout *MachineConstantPool::getDataLayout() const { - return TM.getDataLayout(); + return TM.getSubtargetImpl()->getDataLayout(); } Type *MachineConstantPoolEntry::getType() const { @@ -836,6 +829,37 @@ unsigned MachineConstantPoolEntry::getRelocationInfo() const { return Val.ConstVal->getRelocationInfo(); } +SectionKind +MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { + SectionKind Kind; + switch (getRelocationInfo()) { + default: + llvm_unreachable("Unknown section kind"); + case 2: + Kind = SectionKind::getReadOnlyWithRel(); + break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (DL->getTypeAllocSize(getType())) { + case 4: + Kind = SectionKind::getMergeableConst4(); + break; + case 8: + Kind = SectionKind::getMergeableConst8(); + break; + case 16: + Kind = SectionKind::getMergeableConst16(); + break; + default: + Kind = SectionKind::getMergeableConst(); + break; + } + } + return Kind; +} + MachineConstantPool::~MachineConstantPool() { for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (Constants[i].isMachineConstantPoolEntry()) diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 46cd60a..f6f34ba 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -46,8 +46,7 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) { bool MachineFunctionAnalysis::runOnFunction(Function &F) { assert(!MF && "MachineFunctionAnalysis already initialized!"); MF = new MachineFunction(&F, TM, NextFnNum++, - getAnalysis(), - getAnalysisIfAvailable()); + getAnalysis()); return false; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 5122165..7ad0d94 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -39,6 +39,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -105,23 +106,41 @@ void MachineOperand::setIsDef(bool Val) { IsDef = Val; } +// If this operand is currently a register operand, and if this is in a +// function, deregister the operand from the register's use/def list. +void MachineOperand::removeRegFromUses() { + if (!isReg() || !isOnRegUseList()) + return; + + if (MachineInstr *MI = getParent()) { + if (MachineBasicBlock *MBB = MI->getParent()) { + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); + } + } +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); - // If this operand is currently a register operand, and if this is in a - // function, deregister the operand from the register's use/def list. - if (isReg() && isOnRegUseList()) - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - MF->getRegInfo().removeRegOperandFromUseList(this); + + removeRegFromUses(); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; } +void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + + removeRegFromUses(); + + OpKind = MO_FPImmediate; + Contents.CFP = FPImm; +} + /// ChangeToRegister - Replace this operand with a new register operand of /// the specified value. If an operand is known to be an register already, /// the setReg method should be used. @@ -265,7 +284,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (const MachineBasicBlock *MBB = MI->getParent()) if (const MachineFunction *MF = MBB->getParent()) TM = &MF->getTarget(); - const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr; + const TargetRegisterInfo *TRI = + TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr; switch (getType()) { case MachineOperand::MO_Register: @@ -429,11 +449,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, uint64_t s, unsigned int a, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, const MDNode *Ranges) : PtrInfo(ptrinfo), Size(s), Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), - TBAAInfo(TBAAInfo), Ranges(Ranges) { + AAInfo(AAInfo), Ranges(Ranges) { assert((PtrInfo.V.isNull() || PtrInfo.V.is() || isa(PtrInfo.V.get()->getType())) && "invalid pointer value"); @@ -514,7 +534,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << "(align=" << MMO.getAlignment() << ")"; // Print TBAA info. - if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) { + if (const MDNode *TBAAInfo = MMO.getAAInfo().TBAA) { OS << "(tbaa="; if (TBAAInfo->getNumOperands() > 0) TBAAInfo->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); @@ -523,6 +543,34 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << ")"; } + // Print AA scope info. + if (const MDNode *ScopeInfo = MMO.getAAInfo().Scope) { + OS << "(alias.scope="; + if (ScopeInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { + ScopeInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false); + if (i != ie-1) + OS << ","; + } + else + OS << ""; + OS << ")"; + } + + // Print AA noalias scope info. + if (const MDNode *NoAliasInfo = MMO.getAAInfo().NoAlias) { + OS << "(noalias="; + if (NoAliasInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { + NoAliasInfo->getOperand(i)->printAsOperand(OS, /*PrintType=*/false); + if (i != ie-1) + OS << ","; + } + else + OS << ""; + OS << ")"; + } + // Print nontemporal info. if (MMO.isNonTemporal()) OS << "(nontemporal)"; @@ -865,6 +913,27 @@ void MachineInstr::eraseFromParent() { getParent()->erase(this); } +void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { + assert(getParent() && "Not embedded in a basic block!"); + MachineBasicBlock *MBB = getParent(); + MachineFunction *MF = MBB->getParent(); + assert(MF && "Not embedded in a function!"); + + MachineInstr *MI = (MachineInstr *)this; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MRI.markUsesInDebugValueAsUndef(Reg); + } + MI->eraseFromParent(); +} + void MachineInstr::eraseFromBundle() { assert(getParent() && "Not embedded in a basic block!"); getParent()->erase_instr(this); @@ -1379,7 +1448,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( AliasAnalysis::Location(V, (*I)->getSize(), - (*I)->getTBAAInfo()))) + (*I)->getAAInfo()))) continue; } @@ -1489,8 +1558,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, OS << " = "; // Print the opcode name. - if (TM && TM->getInstrInfo()) - OS << TM->getInstrInfo()->getName(getOpcode()); + if (TM && TM->getSubtargetImpl()->getInstrInfo()) + OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode()); else OS << "UNKNOWN"; @@ -1538,17 +1607,17 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, // call instructions much less noisy on targets where calls clobber lots // of registers. Don't rely on MO.isDead() because we may be called before // LiveVariables is run, or we may be looking at a non-allocatable reg. - if (MF && isCall() && + if (MRI && isCall() && MO.isReg() && MO.isImplicit() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - const MachineRegisterInfo &MRI = MF->getRegInfo(); - if (MRI.use_empty(Reg)) { + if (MRI->use_empty(Reg)) { bool HasAliasLive = false; - for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true); + for (MCRegAliasIterator AI( + Reg, TM->getSubtargetImpl()->getRegisterInfo(), true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; - if (!MRI.use_empty(AliasReg)) { + if (!MRI->use_empty(AliasReg)) { HasAliasLive = true; break; } @@ -1573,12 +1642,16 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (isDebugValue() && MO.isMetadata()) { // Pretty print DBG_VALUE instructions. const MDNode *MD = MO.getMetadata(); - if (const MDString *MDS = dyn_cast(MD->getOperand(2))) - OS << "!\"" << MDS->getString() << '\"'; + DIDescriptor DI(MD); + DIVariable DIV(MD); + + if (DI.isVariable() && !DIV.getName().empty()) + OS << "!\"" << DIV.getName() << '\"'; else MO.print(OS, TM); } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { - OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); + OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName( + MO.getImm()); } else if (i == AsmDescOp && MO.isImm()) { // Pretty print the inline asm operand descriptor. OS << '$' << AsmOpCount++; @@ -1595,9 +1668,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, unsigned RCID = 0; if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { - if (TM) - OS << ':' << TM->getRegisterInfo()->getRegClass(RCID)->getName(); - else + if (TM) { + const TargetRegisterInfo *TRI = + TM->getSubtargetImpl()->getRegisterInfo(); + OS << ':' + << TRI->getRegClassName(TRI->getRegClass(RCID)); + } else OS << ":RC" << RCID; } @@ -1646,7 +1722,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, if (!HaveSemi) OS << ";"; HaveSemi = true; for (unsigned i = 0; i != VirtRegs.size(); ++i) { const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); - OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]); + OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC) + << ':' << PrintReg(VirtRegs[i]); for (unsigned j = i+1; j != VirtRegs.size();) { if (MRI->getRegClass(VirtRegs[j]) != RC) { ++j; @@ -1672,6 +1749,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, OS << " ]"; } } + if (isIndirectDebugValue()) + OS << " indirect"; } else if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; OS << " dbg:"; diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 962169e..0690f08 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -16,6 +16,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { @@ -103,12 +104,12 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, assert(FirstMI != LastMI && "Empty bundle?"); MIBundleBuilder Bundle(MBB, FirstMI, LastMI); - const TargetMachine &TM = MBB.getParent()->getTarget(); - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(), - TII->get(TargetOpcode::BUNDLE)); + MachineInstrBuilder MIB = + BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); Bundle.prepend(MIB); SmallVector LocalDefs; @@ -140,7 +141,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, // Internal def is now killed. KilledDefSet.insert(Reg); } else { - if (ExternUseSet.insert(Reg)) { + if (ExternUseSet.insert(Reg).second) { ExternUses.push_back(Reg); if (MO.isUndef()) UndefUseSet.insert(Reg); @@ -157,7 +158,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, if (!Reg) continue; - if (LocalDefSet.insert(Reg)) { + if (LocalDefSet.insert(Reg).second) { LocalDefs.push_back(Reg); if (MO.isDead()) { DeadDefSet.insert(Reg); @@ -173,7 +174,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, if (!MO.isDead()) { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; - if (LocalDefSet.insert(SubReg)) + if (LocalDefSet.insert(SubReg).second) LocalDefs.push_back(SubReg); } } @@ -185,7 +186,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, SmallSet Added; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; - if (Added.insert(Reg)) { + if (Added.insert(Reg).second) { // If it's not live beyond end of the bundle, mark it dead. bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) | diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 94cdab5..2ab0467 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -39,6 +39,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "machine-licm" @@ -61,7 +62,6 @@ STATISTIC(NumPostRAHoisted, namespace { class MachineLICM : public MachineFunctionPass { - const TargetMachine *TM; const TargetInstrInfo *TII; const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; @@ -142,9 +142,6 @@ namespace { RegPressure.clear(); RegLimit.clear(); BackTrace.clear(); - for (DenseMap >::iterator - CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI) - CI->second.clear(); CSEMap.clear(); } @@ -324,13 +321,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { return false; Changed = FirstInLoop = false; - TM = &MF.getTarget(); - TII = TM->getInstrInfo(); - TLI = TM->getTargetLowering(); - TRI = TM->getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TLI = MF.getSubtarget().getTargetLowering(); + TRI = MF.getSubtarget().getRegisterInfo(); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); - InstrItins = TM->getInstrItineraryData(); + InstrItins = MF.getSubtarget().getInstrItineraryData(); PreRegAlloc = MRI->isSSA(); @@ -822,7 +818,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - bool isNew = RegSeen.insert(Reg); + bool isNew = RegSeen.insert(Reg).second; unsigned RCId, RCCost; getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost); if (MO.isDef()) @@ -854,7 +850,7 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - bool isNew = RegSeen.insert(Reg); + bool isNew = RegSeen.insert(Reg).second; if (MO.isDef()) Defs.push_back(Reg); else if (!isNew && isOperandKill(MO, MRI)) { @@ -1299,15 +1295,7 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { const MachineInstr *MI = &*I; unsigned Opcode = MI->getOpcode(); - DenseMap >::iterator - CI = CSEMap.find(Opcode); - if (CI != CSEMap.end()) - CI->second.push_back(MI); - else { - std::vector CSEMIs; - CSEMIs.push_back(MI); - CSEMap.insert(std::make_pair(Opcode, CSEMIs)); - } + CSEMap[Opcode].push_back(MI); } } @@ -1447,11 +1435,8 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // Add to the CSE map. if (CI != CSEMap.end()) CI->second.push_back(MI); - else { - std::vector CSEMIs; - CSEMIs.push_back(MI); - CSEMap.insert(std::make_pair(Opcode, CSEMIs)); - } + else + CSEMap[Opcode].push_back(MI); } ++NumHoisted; diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 4976e35..eb3c0bf 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -270,7 +270,6 @@ MachineModuleInfo::~MachineModuleInfo() { bool MachineModuleInfo::doInitialization(Module &M) { ObjFileMMI = nullptr; - CompactUnwindEncoding = 0; CurCallSite = 0; CallsEHReturn = 0; CallsUnwindInit = 0; @@ -312,7 +311,6 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = 0; CallsUnwindInit = 0; - CompactUnwindEncoding = 0; VariableDbgInfos.clear(); } @@ -429,7 +427,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, /// void MachineModuleInfo:: addCatchTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef TyInfo) { + ArrayRef TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); for (unsigned N = TyInfo.size(); N; --N) LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); @@ -439,7 +437,7 @@ addCatchTypeInfo(MachineBasicBlock *LandingPad, /// void MachineModuleInfo:: addFilterTypeInfo(MachineBasicBlock *LandingPad, - ArrayRef TyInfo) { + ArrayRef TyInfo) { LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); std::vector IdsInFilter(TyInfo.size()); for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) @@ -508,7 +506,7 @@ void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym, /// getTypeIDFor - Return the type id for the specified typeinfo. This is /// function wide. -unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) { +unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) { for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) if (TypeInfos[i] == TI) return i + 1; diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp new file mode 100644 index 0000000..5a5035e --- /dev/null +++ b/lib/CodeGen/MachineRegionInfo.cpp @@ -0,0 +1,140 @@ + +#include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/RegionInfoImpl.h" + +#define DEBUG_TYPE "region" + +using namespace llvm; + +STATISTIC(numMachineRegions, "The # of machine regions"); +STATISTIC(numMachineSimpleRegions, "The # of simple machine regions"); + +namespace llvm { +template class RegionBase>; +template class RegionNodeBase>; +template class RegionInfoBase>; +} + +//===----------------------------------------------------------------------===// +// MachineRegion implementation +// + +MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, + MachineRegionInfo* RI, + MachineDominatorTree *DT, MachineRegion *Parent) : + RegionBase>(Entry, Exit, RI, DT, Parent) { + +} + +MachineRegion::~MachineRegion() { } + +//===----------------------------------------------------------------------===// +// MachineRegionInfo implementation +// + +MachineRegionInfo::MachineRegionInfo() : + RegionInfoBase>() { + +} + +MachineRegionInfo::~MachineRegionInfo() { + +} + +void MachineRegionInfo::updateStatistics(MachineRegion *R) { + ++numMachineRegions; + + // TODO: Slow. Should only be enabled if -stats is used. + if (R->isSimple()) + ++numMachineSimpleRegions; +} + +void MachineRegionInfo::recalculate(MachineFunction &F, + MachineDominatorTree *DT_, + MachinePostDominatorTree *PDT_, + MachineDominanceFrontier *DF_) { + DT = DT_; + PDT = PDT_; + DF = DF_; + + MachineBasicBlock *Entry = GraphTraits::getEntryNode(&F); + + TopLevelRegion = new MachineRegion(Entry, nullptr, this, DT, nullptr); + updateStatistics(TopLevelRegion); + calculate(F); +} + +//===----------------------------------------------------------------------===// +// MachineRegionInfoPass implementation +// + +MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) { + initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry()); +} + +MachineRegionInfoPass::~MachineRegionInfoPass() { + +} + +bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) { + releaseMemory(); + + auto DT = &getAnalysis(); + auto PDT = &getAnalysis(); + auto DF = &getAnalysis(); + + RI.recalculate(F, DT, PDT, DF); + return false; +} + +void MachineRegionInfoPass::releaseMemory() { + RI.releaseMemory(); +} + +void MachineRegionInfoPass::verifyAnalysis() const { + // Only do verification when user wants to, otherwise this expensive check + // will be invoked by PMDataManager::verifyPreservedAnalysis when + // a regionpass (marked PreservedAll) finish. + if (MachineRegionInfo::VerifyRegionInfo) + RI.verifyAnalysis(); +} + +void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequired(); + AU.addRequired(); +} + +void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const { + RI.print(OS); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineRegionInfoPass::dump() const { + RI.dump(); +} +#endif + +char MachineRegionInfoPass::ID = 0; + +INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions", + "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(MachineRegionInfoPass, "regions", + "Detect single entry single exit regions", true, true) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +namespace llvm { + FunctionPass *createMachineRegionInfoPass() { + return new MachineRegionInfoPass(); + } +} + diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f560259..e9612f3 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -16,28 +16,22 @@ #include "llvm/Support/raw_os_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; // Pin the vtable to this file. void MachineRegisterInfo::Delegate::anchor() {} -MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) - : TM(TM), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) { +MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF) + : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); // Create the physreg use/def lists. - PhysRegUseDefLists = - new MachineOperand*[getTargetRegisterInfo()->getNumRegs()]; - memset(PhysRegUseDefLists, 0, - sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs()); -} - -MachineRegisterInfo::~MachineRegisterInfo() { - delete [] PhysRegUseDefLists; + PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr); } /// setRegClass - Set the register class of the specified virtual register. @@ -67,7 +61,7 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, bool MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { - const TargetInstrInfo *TII = TM.getInstrInfo(); + const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); const TargetRegisterClass *NewRC = getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); @@ -283,18 +277,25 @@ void MachineRegisterInfo::moveOperands(MachineOperand *Dst, /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. +/// If ToReg is a physical register we apply the sub register to obtain the +/// final/proper physical register. void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { assert(FromReg != ToReg && "Cannot replace a reg with itself"); + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + // TODO: This could be more efficient by bulk changing the operands. for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { MachineOperand &O = *I; ++I; - O.setReg(ToReg); + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + O.substPhysReg(ToReg, *TRI); + } else { + O.setReg(ToReg); + } } } - /// getVRegDef - Return the machine instr that defines the specified virtual /// register or null if none is found. This assumes that the code is in SSA /// form, so there should only be one definition. diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index d9173a2..71a6eba 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -24,8 +24,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; @@ -39,7 +39,7 @@ static AvailableValsTy &getAvailableVals(void *AV) { MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl *NewPHI) : AV(nullptr), InsertedPHIs(NewPHI) { - TII = MF.getTarget().getInstrInfo(); + TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 44191f7..261942f 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -40,6 +40,9 @@ cl::opt ForceTopDown("misched-topdown", cl::Hidden, cl::desc("Force top-down list scheduling")); cl::opt ForceBottomUp("misched-bottomup", cl::Hidden, cl::desc("Force bottom-up list scheduling")); +cl::opt +DumpCriticalPathLength("misched-dcpl", cl::Hidden, + cl::desc("Print critical path length to stdout")); } #ifndef NDEBUG @@ -378,7 +381,7 @@ static bool isSchedBoundary(MachineBasicBlock::iterator MI, /// Main driver for both MachineScheduler and PostMachineScheduler. void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); bool IsPostRA = Scheduler.isPostRA(); // Visit all machine basic blocks. @@ -451,6 +454,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) { else dbgs() << "End"; dbgs() << " RegionInstrs: " << NumRegionInstrs << " Remaining: " << RemainingInstrs << "\n"); + if (DumpCriticalPathLength) { + errs() << MF->getName(); + errs() << ":BB# " << MBB->getNumber(); + errs() << " " << MBB->getName() << " \n"; + } // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -2355,14 +2363,15 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); if (!Top.HazardRec) { Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } if (!Bot.HazardRec) { Bot.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } } @@ -2370,8 +2379,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) { - const TargetMachine &TM = Context->MF->getTarget(); - const TargetLowering *TLI = TM.getTargetLowering(); + const MachineFunction &MF = *Begin->getParent()->getParent(); + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); // Avoid setting up the register pressure tracker for small regions to save // compile time. As a rough heuristic, only track pressure when the number of @@ -2391,8 +2400,8 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, RegionPolicy.OnlyBottomUp = true; // Allow the subtarget to override default policy. - const TargetSubtargetInfo &ST = TM.getSubtarget(); - ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End, + NumRegionInstrs); // After subtarget overrides, apply command line options. if (!EnableRegPressure) @@ -2460,7 +2469,10 @@ void GenericScheduler::registerRoots() { if ((*I)->getDepth() > Rem.CriticalPath) Rem.CriticalPath = (*I)->getDepth(); } - DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); + if (DumpCriticalPathLength) { + errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; + } if (EnableCyclicPath) { Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); @@ -2482,8 +2494,8 @@ static bool tryPressure(const PressureChange &TryP, } // If one candidate decreases and the other increases, go with it. // Invalid candidates have UnitInc==0. - if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, - Reason)) { + if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, + Reason)) { return true; } // If the candidates are decreasing pressure, reverse priority. @@ -2885,10 +2897,10 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { // Initialize the HazardRecognizers. If itineraries don't exist, are empty, // or are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); if (!Top.HazardRec) { Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); } } @@ -2902,7 +2914,10 @@ void PostGenericScheduler::registerRoots() { if ((*I)->getDepth() > Rem.CriticalPath) Rem.CriticalPath = (*I)->getDepth(); } - DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); + if (DumpCriticalPathLength) { + errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; + } } /// Apply a set of heursitics to a new candidate for PostRA scheduling. diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 0ae495c..ba25bca 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -17,18 +17,21 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "machine-sink" @@ -38,6 +41,12 @@ SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden); +static cl::opt +UseBlockFreqInfo("machine-sink-bfi", + cl::desc("Use block frequency info to find successors to sink"), + cl::init(true), cl::Hidden); + + STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); @@ -46,14 +55,20 @@ namespace { class MachineSinking : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; // Machine register information - MachineDominatorTree *DT; // Machine dominator tree + MachineRegisterInfo *MRI; // Machine register information + MachineDominatorTree *DT; // Machine dominator tree + MachinePostDominatorTree *PDT; // Machine post dominator tree MachineLoopInfo *LI; + const MachineBlockFrequencyInfo *MBFI; AliasAnalysis *AA; // Remember which edges have been considered for breaking. SmallSet, 8> CEBCandidates; + // Remember which edges we are about to split. + // This is different from CEBCandidates since those edges + // will be split. + SetVector > ToSplit; public: static char ID; // Pass identification @@ -68,9 +83,13 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); + if (UseBlockFreqInfo) + AU.addRequired(); } void releaseMemory() override { @@ -82,10 +101,22 @@ namespace { bool isWorthBreakingCriticalEdge(MachineInstr *MI, MachineBasicBlock *From, MachineBasicBlock *To); - MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI, - MachineBasicBlock *From, - MachineBasicBlock *To, - bool BreakPHIEdge); + /// \brief Postpone the splitting of the given critical + /// edge (\p From, \p To). + /// + /// We do not split the edges on the fly. Indeed, this invalidates + /// the dominance information and thus triggers a lot of updates + /// of that information underneath. + /// Instead, we postpone all the splits after each iteration of + /// the main loop. That way, the information is at least valid + /// for the lifetime of an iteration. + /// + /// \return True if the edge is marked as toSplit, false otherwise. + /// False can be retruned if, for instance, this is not profitable. + bool PostponeSplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To, + bool BreakPHIEdge); bool SinkInstruction(MachineInstr *MI, bool &SawStore); bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, @@ -135,6 +166,11 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, DEBUG(dbgs() << "*** to: " << *MI); MRI->replaceRegWith(DstReg, SrcReg); MI->eraseFromParent(); + + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. + MRI->clearKillFlags(SrcReg); + ++NumCoalesces; return true; } @@ -213,12 +249,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Machine Sinking ********\n"); - const TargetMachine &TM = MF.getTarget(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = &getAnalysis(); + PDT = &getAnalysis(); LI = &getAnalysis(); + MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; AA = &getAnalysis(); bool EverMadeChange = false; @@ -228,10 +265,24 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { // Process all basic blocks. CEBCandidates.clear(); + ToSplit.clear(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); + // If we have anything we marked as toSplit, split it now. + for (auto &Pair : ToSplit) { + auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, this); + if (NewSucc != nullptr) { + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << Pair.first->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << Pair.second->getNumber() << '\n'); + MadeChange = true; + ++NumSplit; + } else + DEBUG(dbgs() << " *** Not legal to break critical edge\n"); + } // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; EverMadeChange = true; @@ -289,7 +340,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, // If the pass has already considered breaking this edge (during this pass // through the function), then let's go ahead and break it. This means // sinking multiple "cheap" instructions into the same block. - if (!CEBCandidates.insert(std::make_pair(From, To))) + if (!CEBCandidates.insert(std::make_pair(From, To)).second) return true; if (!MI->isCopy() && !TII->isAsCheapAsAMove(MI)) @@ -328,21 +379,21 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, return false; } -MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, - MachineBasicBlock *FromBB, - MachineBasicBlock *ToBB, - bool BreakPHIEdge) { +bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) - return nullptr; + return false; // Avoid breaking back edge. From == To means backedge for single BB loop. if (!SplitEdges || FromBB == ToBB) - return nullptr; + return false; // Check for backedges of more "complex" loops. if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && LI->isLoopHeader(ToBB)) - return nullptr; + return false; // It's not always legal to break critical edges and sink the computation // to the edge. @@ -389,11 +440,13 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, if (*PI == FromBB) continue; if (!DT->dominates(ToBB, *PI)) - return nullptr; + return false; } } - return FromBB->SplitCriticalEdge(ToBB, this); + ToSplit.insert(std::make_pair(FromBB, ToBB)); + + return true; } static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { @@ -419,23 +472,6 @@ static void collectDebugValues(MachineInstr *MI, } } -/// isPostDominatedBy - Return true if A is post dominated by B. -static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) { - - // FIXME - Use real post dominator. - if (A->succ_size() != 2) - return false; - MachineBasicBlock::succ_iterator I = A->succ_begin(); - if (B == *I) - ++I; - MachineBasicBlock *OtherSuccBlock = *I; - if (OtherSuccBlock->succ_size() != 1 || - *(OtherSuccBlock->succ_begin()) != B) - return false; - - return true; -} - /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, MachineBasicBlock *MBB, @@ -447,8 +483,13 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, return false; // It is profitable if SuccToSinkTo does not post dominate current block. - if (!isPostDominatedBy(MBB, SuccToSinkTo)) - return true; + if (!PDT->dominates(SuccToSinkTo, MBB)) + return true; + + // It is profitable to sink an instruction from a deeper loop to a shallower + // loop, even if the latter post-dominates the former (PR21115). + if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + return true; // Check if only use in post dominated block is PHI instruction. bool NonPHIUse = false; @@ -539,14 +580,20 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, } // Otherwise, we should look at all the successors and decide which one - // we should sink to. - // We give successors with smaller loop depth higher priority. - SmallVector Succs(MBB->succ_begin(), MBB->succ_end()); - // Sort Successors according to their loop depth. + // we should sink to. If we have reliable block frequency information + // (frequency != 0) available, give successors with smaller frequencies + // higher priority, otherwise prioritize smaller loop depths. + SmallVector Succs(MBB->succ_begin(), + MBB->succ_end()); + // Sort Successors according to their loop depth or block frequency info. std::stable_sort( Succs.begin(), Succs.end(), - [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) { - return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { + uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; + uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; + bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; + return HasBlockFreq ? LHSFreq < RHSFreq + : LI->getLoopDepth(L) < LI->getLoopDepth(R); }); for (SmallVectorImpl::iterator SI = Succs.begin(), E = Succs.end(); SI != E; ++SI) { @@ -655,21 +702,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (!TryBreak) DEBUG(dbgs() << "Sinking along critical edge.\n"); else { - MachineBasicBlock *NewSucc = - SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); - if (!NewSucc) { + // Mark this edge as to be split. + // If the edge can actually be split, the next iteration of the main loop + // will sink MI in the newly created block. + bool Status = + PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); + if (!Status) DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " - "break critical edge\n"); - return false; - } else { - DEBUG(dbgs() << " *** Splitting critical edge:" - " BB#" << ParentBlock->getNumber() - << " -- BB#" << NewSucc->getNumber() - << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); - SuccToSinkTo = NewSucc; - ++NumSplit; - BreakPHIEdge = false; - } + "break critical edge\n"); + // The instruction will not be sunk this time. + return false; } } @@ -677,20 +719,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // BreakPHIEdge is true if all the uses are in the successor MBB being // sunken into and they are all PHI nodes. In this case, machine-sink must // break the critical edge first. - MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock, - SuccToSinkTo, BreakPHIEdge); - if (!NewSucc) { + bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, + SuccToSinkTo, BreakPHIEdge); + if (!Status) DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); - return false; - } - - DEBUG(dbgs() << " *** Splitting critical edge:" - " BB#" << ParentBlock->getNumber() - << " -- BB#" << NewSucc->getNumber() - << " -- BB#" << SuccToSinkTo->getNumber() << '\n'); - SuccToSinkTo = NewSucc; - ++NumSplit; + // The instruction will not be sunk this time. + return false; } // Determine where to insert into. Skip phi nodes. diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 1bbf0ad..2cf87eb 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -52,13 +52,13 @@ void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF = &Func; - TII = MF->getTarget().getInstrInfo(); - TRI = MF->getTarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); MRI = &MF->getRegInfo(); Loops = &getAnalysis(); const TargetSubtargetInfo &ST = MF->getTarget().getSubtarget(); - SchedModel.init(*ST.getSchedModel(), &ST, TII); + SchedModel.init(ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); ProcResourceCycles.resize(MF->getNumBlockIDs() * SchedModel.getNumProcResourceKinds()); @@ -135,8 +135,7 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { "getResources() must be called before getProcResourceCycles()"); unsigned PRKinds = SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size()); - return ArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, - PRKinds); + return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds); } @@ -256,8 +255,7 @@ MachineTraceMetrics::Ensemble:: getProcResourceDepths(unsigned MBBNum) const { unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); - return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, - PRKinds); + return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds); } /// Get an array of processor resource heights for MBB. Indexed by processor @@ -270,8 +268,7 @@ MachineTraceMetrics::Ensemble:: getProcResourceHeights(unsigned MBBNum) const { unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); - return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, - PRKinds); + return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds); } //===----------------------------------------------------------------------===// @@ -452,7 +449,7 @@ public: } // To is a new block. Mark the block as visited in case the CFG has cycles // that MachineLoopInfo didn't recognize as a natural loop. - return LB.Visited.insert(To); + return LB.Visited.insert(To).second; } }; } @@ -1169,6 +1166,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { return DepCycle; } +/// When bottom is set include instructions in current block in estimate. unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { // Find the limiting processor resource. // Numbers have been pre-scaled to be comparable. @@ -1185,7 +1183,9 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { // Convert to cycle count. PRMax = TE.MTM.getCycles(PRMax); + /// All instructions before current block unsigned Instrs = TBI.InstrDepth; + // plus instructions in current block if (Bottom) Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) @@ -1194,44 +1194,72 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { return std::max(Instrs, PRMax); } - -unsigned MachineTraceMetrics::Trace:: -getResourceLength(ArrayRef Extrablocks, - ArrayRef ExtraInstrs) const { +unsigned MachineTraceMetrics::Trace::getResourceLength( + ArrayRef Extrablocks, + ArrayRef ExtraInstrs, + ArrayRef RemoveInstrs) const { // Add up resources above and below the center block. ArrayRef PRDepths = TE.getProcResourceDepths(getBlockNum()); ArrayRef PRHeights = TE.getProcResourceHeights(getBlockNum()); unsigned PRMax = 0; - for (unsigned K = 0; K != PRDepths.size(); ++K) { - unsigned PRCycles = PRDepths[K] + PRHeights[K]; - for (unsigned I = 0; I != Extrablocks.size(); ++I) - PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; - for (unsigned I = 0; I != ExtraInstrs.size(); ++I) { - const MCSchedClassDesc* SC = ExtraInstrs[I]; + + // Capture computing cycles from extra instructions + auto extraCycles = [this](ArrayRef Instrs, + unsigned ResourceIdx) + ->unsigned { + unsigned Cycles = 0; + for (unsigned I = 0; I != Instrs.size(); ++I) { + const MCSchedClassDesc *SC = Instrs[I]; if (!SC->isValid()) continue; for (TargetSchedModel::ProcResIter - PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), - PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { - if (PI->ProcResourceIdx != K) + PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), + PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + if (PI->ProcResourceIdx != ResourceIdx) continue; - PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K)); + Cycles += + (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); } } + return Cycles; + }; + + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (unsigned I = 0; I != Extrablocks.size(); ++I) + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + PRCycles += extraCycles(ExtraInstrs, K); + PRCycles -= extraCycles(RemoveInstrs, K); PRMax = std::max(PRMax, PRCycles); } // Convert to cycle count. PRMax = TE.MTM.getCycles(PRMax); + // Instrs: #instructions in current trace outside current block. unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + // Add instruction count from the extra blocks. for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + Instrs += ExtraInstrs.size(); + Instrs -= RemoveInstrs.size(); if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. return std::max(Instrs, PRMax); } +bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI, + const MachineInstr *UseMI) const { + if (DefMI->getParent() == UseMI->getParent()) + return true; + + const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()]; + const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()]; + + return DepTBI.isUsefulDominator(TBI); +} + void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { OS << getName() << " ensemble:\n"; for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 8515b0f..99f0583 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -46,6 +46,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { @@ -214,9 +215,9 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); void report(const char *msg, const MachineFunction *MF, - const LiveRange &LR); + const LiveRange &LR, unsigned Reg); void report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR); + const LiveRange &LR, unsigned Reg); void verifyInlineAsm(const MachineInstr *MI); @@ -275,11 +276,12 @@ void MachineFunction::verify(Pass *p, const char *Banner) const { bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = nullptr; if (OutFileName) { - std::string ErrorInfo; - OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, + std::error_code EC; + OutFile = new raw_fd_ostream(OutFileName, EC, sys::fs::F_Append | sys::fs::F_Text); - if (!ErrorInfo.empty()) { - errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; + if (EC) { + errs() << "Error opening '" << OutFileName << "': " << EC.message() + << '\n'; exit(1); } @@ -292,8 +294,8 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; TM = &MF.getTarget(); - TII = TM->getInstrInfo(); - TRI = TM->getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); LiveVars = nullptr; @@ -430,15 +432,17 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, - const LiveRange &LR) { + const LiveRange &LR, unsigned Reg) { report(msg, MBB); - *OS << "- liverange: " << LR << "\n"; + *OS << "- liverange: " << LR << '\n'; + *OS << "- register: " << PrintReg(Reg, TRI) << '\n'; } void MachineVerifier::report(const char *msg, const MachineFunction *MF, - const LiveRange &LR) { + const LiveRange &LR, unsigned Reg) { report(msg, MF); - *OS << "- liverange: " << LR << "\n"; + *OS << "- liverange: " << LR << '\n'; + *OS << "- register: " << PrintReg(Reg, TRI) << '\n'; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -905,7 +909,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (!DRC->contains(Reg)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(Reg) << " is not a " - << DRC->getName() << " register.\n"; + << TRI->getRegClassName(DRC) << " register.\n"; } } } else { @@ -916,13 +920,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { TRI->getSubClassWithSubReg(RC, SubIdx); if (!SRC) { report("Invalid subregister index for virtual register", MO, MONum); - *OS << "Register class " << RC->getName() + *OS << "Register class " << TRI->getRegClassName(RC) << " does not support subreg index " << SubIdx << "\n"; return; } if (RC != SRC) { report("Invalid register class for subregister index", MO, MONum); - *OS << "Register class " << RC->getName() + *OS << "Register class " << TRI->getRegClassName(RC) << " does not fully support subreg index " << SubIdx << "\n"; return; } @@ -944,8 +948,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } if (!RC->hasSuperClassEq(DRC)) { report("Illegal virtual register for instruction", MO, MONum); - *OS << "Expected a " << DRC->getName() << " register, but got a " - << RC->getName() << " register\n"; + *OS << "Expected a " << TRI->getRegClassName(DRC) + << " register, but got a " << TRI->getRegClassName(RC) + << " register\n"; } } } @@ -1357,13 +1362,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LR); + report("Valno not live at def and not marked unused", MF, LR, Reg); *OS << "Valno #" << VNI->id << '\n'; return; } if (DefVNI != VNI) { - report("Live segment at def has different valno", MF, LR); + report("Live segment at def has different valno", MF, LR, Reg); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " where valno #" << DefVNI->id << " is live\n"; return; @@ -1371,7 +1376,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LR); + report("Invalid definition index", MF, LR, Reg); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LR << '\n'; return; @@ -1379,7 +1384,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LR); + report("PHIDef value is not defined at MBB start", MBB, LR, Reg); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; } @@ -1389,7 +1394,7 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LR); + report("No instruction at def index", MBB, LR, Reg); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; return; } @@ -1422,12 +1427,13 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, // DEF slots. if (isEarlyClobber) { if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MBB, LR); + report("Early clobber def must be at an early-clobber slot", MBB, LR, + Reg); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } else if (!VNI->def.isRegister()) { report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LR); + MBB, LR, Reg); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } @@ -1441,31 +1447,31 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, assert(VNI && "Live segment has no valno"); if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { - report("Foreign valno in live segment", MF, LR); + report("Foreign valno in live segment", MF, LR, Reg); *OS << S << " has a bad valno\n"; } if (VNI->isUnused()) { - report("Live segment valno is marked unused", MF, LR); + report("Live segment valno is marked unused", MF, LR, Reg); *OS << S << '\n'; } const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LR); + report("Bad start of live segment, no basic block", MF, LR, Reg); *OS << S << '\n'; return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); if (S.start != MBBStartIdx && S.start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LR); + report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg); *OS << S << '\n'; } const MachineBasicBlock *EndMBB = LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LR); + report("Bad end of live segment, no basic block", MF, LR, Reg); *OS << S << '\n'; return; } @@ -1483,14 +1489,14 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, const MachineInstr *MI = LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LR); + report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg); *OS << S << '\n'; return; } // The block slot must refer to a basic block boundary. if (S.end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LR); + report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg); *OS << S << '\n'; } @@ -1498,7 +1504,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Segment ends on the dead slot. // That means there must be a dead def. if (!SlotIndex::isSameInstr(S.start, S.end)) { - report("Live segment ending at dead slot spans instructions", EndMBB, LR); + report("Live segment ending at dead slot spans instructions", EndMBB, LR, + Reg); *OS << S << '\n'; } } @@ -1508,7 +1515,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, if (S.end.isEarlyClobber()) { if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LR); + "redefined by an EC def in the same instruction", EndMBB, LR, Reg); *OS << S << '\n'; } } @@ -1566,7 +1573,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LR); + report("Register not marked live out of predecessor", *PI, LR, Reg); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << '\n'; @@ -1575,7 +1582,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LR); + report("Different value live out of predecessor", *PI, LR, Reg); *OS << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 95a2934..a1042e7 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "phi-opt" @@ -66,7 +67,7 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { return false; MRI = &Fn.getRegInfo(); - TII = Fn.getTarget().getInstrInfo(); + TII = Fn.getSubtarget().getInstrInfo(); // Find dead PHI cycles and PHI cycles that can be replaced by a single // value. InstCombine does these optimizations, but DAG legalization may @@ -91,7 +92,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, unsigned DstReg = MI->getOperand(0).getReg(); // See if we already saw this register. - if (!PHIsInCycle.insert(MI)) + if (!PHIsInCycle.insert(MI).second) return true; // Don't scan crazily complex things. @@ -136,7 +137,7 @@ bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { "PHI destination is not a virtual register"); // See if we already saw this register. - if (!PHIsInCycle.insert(MI)) + if (!PHIsInCycle.insert(MI).second) return true; // Don't scan crazily complex things. diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index c8d0819..def2e3d 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -30,7 +30,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -150,9 +150,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { Changed |= EliminatePHINodes(MF, *I); // Remove dead IMPLICIT_DEF instructions. - for (SmallPtrSet::iterator I = ImpDefs.begin(), - E = ImpDefs.end(); I != E; ++I) { - MachineInstr *DefMI = *I; + for (MachineInstr *DefMI : ImpDefs) { unsigned DefReg = DefMI->getOperand(0).getReg(); if (MRI->use_nodbg_empty(DefReg)) { if (LIS) @@ -240,7 +238,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Insert a register to register copy at the top of the current block (but // after any remaining phi nodes) which copies the new incoming register // into the phi node destination. - const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); if (isSourceDefinedByImplicitDef(MPhi, MRI)) // If all sources of a PHI node are implicit_def, just emit an // implicit_def instead of a copy. @@ -369,7 +367,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. - if (!MBBsInsertedInto.insert(&opBlock)) + if (!MBBsInsertedInto.insert(&opBlock).second) continue; // If the copy has already been emitted, we're done. // Find a safe location to insert the copy, this may be the first terminator diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h index 48234ae..b997d7a 100644 --- a/lib/CodeGen/PHIEliminationUtils.h +++ b/lib/CodeGen/PHIEliminationUtils.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H -#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H +#ifndef LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H +#define LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 249b2d0..ec71d86 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/SymbolRewriter.h" using namespace llvm; @@ -71,6 +72,8 @@ static cl::opt DisableCGP("disable-cgp", cl::Hidden, cl::desc("Disable Codegen Prepare")); static cl::opt DisableCopyProp("disable-copyprop", cl::Hidden, cl::desc("Disable Copy Propagation pass")); +static cl::opt DisablePartialLibcallInlining("disable-partial-libcall-inlining", + cl::Hidden, cl::desc("Disable Partial Libcall Inlining")); static cl::opt PrintLSR("print-lsr-output", cl::Hidden, cl::desc("Print LLVM IR produced by the loop-reduce pass")); static cl::opt PrintISelInput("print-isel-input", cl::Hidden, @@ -97,6 +100,10 @@ static cl::opt MISchedPostRA("misched-postra", cl::Hidden, static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); +static cl::opt UseCFLAA("use-cfl-aa-in-codegen", + cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis in CodeGen")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -374,7 +381,10 @@ void TargetPassConfig::addIRPasses() { // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. + if (UseCFLAA) + addPass(createCFLAliasAnalysisPass()); addPass(createTypeBasedAliasAnalysisPass()); + addPass(createScopedNoAliasAAPass()); addPass(createBasicAliasAnalysisPass()); // Before running any passes, run the verifier to determine if the input @@ -399,6 +409,9 @@ void TargetPassConfig::addIRPasses() { // Prepare expensive constants for SelectionDAG. if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting) addPass(createConstantHoistingPass()); + + if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining) + addPass(createPartiallyInlineLibCallsPass()); } /// Turn exception handling constructs into something the code generators can @@ -416,7 +429,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - case ExceptionHandling::WinEH: + case ExceptionHandling::ItaniumWinEH: addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: @@ -433,6 +446,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) addPass(createCodeGenPreparePass(TM)); + addPass(createRewriteSymbolsPass()); } /// Add common passes that perform LLVM IR to IR transforms in preparation for @@ -601,6 +615,9 @@ void TargetPassConfig::addMachineSSAOptimization() { printAndVerify("After Machine LICM, CSE and Sinking passes"); addPass(&PeepholeOptimizerID); + // Clean-up the dead code that may have been generated by peephole + // rewriting. + addPass(&DeadMachineInstructionElimID); printAndVerify("After codegen peephole optimization pass"); } @@ -675,6 +692,12 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { return createTargetRegisterAllocator(Optimized); } +/// Return true if the default global register allocator is in use and +/// has not be overriden on the command line with '-regalloc=...' +bool TargetPassConfig::usingDefaultRegAlloc() const { + return RegAlloc.getNumOccurrences() == 0; +} + /// Add the minimum set of target-independent passes that are required for /// register allocation. No coalescing or scheduling. void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { @@ -709,6 +732,7 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(&TwoAddressInstructionPassID); addPass(&RegisterCoalescerID); + printAndVerify("After Register Coalescing"); // PreRA instruction scheduling. if (addPass(&MachineSchedulerID)) diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 716cb1f..a296aea 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -46,7 +46,7 @@ // if it loads to virtual registers and the virtual register defined has // a single use. // -// - Optimize Copies and Bitcast: +// - Optimize Copies and Bitcast (more generally, target specific copies): // // Rewrite copies and bitcasts to avoid cross register bank copies // when possible. @@ -78,6 +78,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include using namespace llvm; #define DEBUG_TYPE "peephole-opt" @@ -92,7 +94,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); static cl::opt -DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true), +DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); STATISTIC(NumReuse, "Number of extension results reused"); @@ -100,12 +102,13 @@ STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); -STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized"); +STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); +STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); namespace { class PeepholeOptimizer : public MachineFunctionPass { - const TargetMachine *TM; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; MachineDominatorTree *DT; // Machine dominator tree @@ -129,9 +132,14 @@ namespace { private: bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet &LocalMIs); + SmallPtrSetImpl &LocalMIs); bool optimizeSelect(MachineInstr *MI); + bool optimizeCondBranch(MachineInstr *MI); bool optimizeCopyOrBitcast(MachineInstr *MI); + bool optimizeCoalescableCopy(MachineInstr *MI); + bool optimizeUncoalescableCopy(MachineInstr *MI, + SmallPtrSetImpl &LocalMIs); + bool findNextSource(unsigned &Reg, unsigned &SubReg); bool isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); @@ -140,6 +148,25 @@ namespace { DenseMap &ImmDefMIs); bool isLoadFoldable(MachineInstr *MI, SmallSet &FoldAsLoadDefCandidates); + + /// \brief Check whether \p MI is understood by the register coalescer + /// but may require some rewriting. + bool isCoalescableCopy(const MachineInstr &MI) { + // SubregToRegs are not interesting, because they are already register + // coalescer friendly. + return MI.isCopy() || (!DisableAdvCopyOpt && + (MI.isRegSequence() || MI.isInsertSubreg() || + MI.isExtractSubreg())); + } + + /// \brief Check whether \p MI is a copy like instruction that is + /// not recognized by the register coalescer. + bool isUncoalescableCopy(const MachineInstr &MI) { + return MI.isBitcast() || + (!DisableAdvCopyOpt && + (MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike())); + } }; /// \brief Helper class to track the possible sources of a value defined by @@ -176,63 +203,87 @@ namespace { /// the ValueTracker class but that would have complicated the code of /// the users of this class. bool UseAdvancedTracking; - /// Optional MachineRegisterInfo used to perform some complex + /// MachineRegisterInfo used to perform tracking. + const MachineRegisterInfo &MRI; + /// Optional TargetInstrInfo used to perform some complex /// tracking. - const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; /// \brief Dispatcher to the right underlying implementation of /// getNextSource. - bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for Copy instructions. - bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for Bitcast instructions. - bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for RegSequence /// instructions. - bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for InsertSubreg /// instructions. - bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for ExtractSubreg /// instructions. - bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Specialized version of getNextSource for SubregToReg /// instructions. - bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg); + bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); public: - /// \brief Create a ValueTracker instance for the value defines by \p MI - /// at the operand index \p DefIdx. + /// \brief Create a ValueTracker instance for the value defined by \p Reg. /// \p DefSubReg represents the sub register index the value tracker will - /// track. It does not need to match the sub register index used in \p MI. + /// track. It does not need to match the sub register index used in the + /// definition of \p Reg. /// \p UseAdvancedTracking specifies whether or not the value tracker looks /// through complex instructions. By default (false), it handles only copy /// and bitcast instructions. - /// \p MRI useful to perform some complex checks. + /// If \p Reg is a physical register, a value tracker constructed with + /// this constructor will not find any alternative source. + /// Indeed, when \p Reg is a physical register that constructor does not + /// know which definition of \p Reg it should track. + /// Use the next constructor to track a physical register. + ValueTracker(unsigned Reg, unsigned DefSubReg, + const MachineRegisterInfo &MRI, + bool UseAdvancedTracking = false, + const TargetInstrInfo *TII = nullptr) + : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + Def = MRI.getVRegDef(Reg); + DefIdx = MRI.def_begin(Reg).getOperandNo(); + } + } + + /// \brief Create a ValueTracker instance for the value defined by + /// the pair \p MI, \p DefIdx. + /// Unlike the other constructor, the value tracker produced by this one + /// may be able to find a new source when the definition is a physical + /// register. + /// This could be useful to rewrite target specific instructions into + /// generic copy instructions. ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, + const MachineRegisterInfo &MRI, bool UseAdvancedTracking = false, - const MachineRegisterInfo *MRI = nullptr) + const TargetInstrInfo *TII = nullptr) : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), - UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) { - assert(Def->getOperand(DefIdx).isDef() && - Def->getOperand(DefIdx).isReg() && - "Definition does not match machine instruction"); - // Initially the value is in the defined register. + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { + assert(DefIdx < Def->getDesc().getNumDefs() && + Def->getOperand(DefIdx).isReg() && "Invalid definition"); Reg = Def->getOperand(DefIdx).getReg(); } /// \brief Following the use-def chain, get the next available source /// for the tracked value. - /// When the returned value is not nullptr, getReg() gives the register + /// When the returned value is not nullptr, \p SrcReg gives the register /// that contain the tracked value. /// \note The sub register index returned in \p SrcSubReg must be used - /// on that getReg() to access the actual value. + /// on \p SrcReg to access the actual value. /// \return Unless the returned value is nullptr (i.e., no source found), - /// \p SrcIdx gives the index of the next source in the returned - /// instruction and \p SrcSubReg the index to be used on that source to - /// get the tracked value. When nullptr is returned, no alternative source - /// has been found. - const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \p SrcReg gives the register of the next source used in the returned + /// instruction and \p SrcSubReg the sub-register index to be used on that + /// source to get the tracked value. When nullptr is returned, no + /// alternative source has been found. + const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); /// \brief Get the last register where the initial value can be found. /// Initially this is the register of the definition. @@ -261,7 +312,7 @@ INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", /// debug uses. bool PeepholeOptimizer:: optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, - SmallPtrSet &LocalMIs) { + SmallPtrSetImpl &LocalMIs) { unsigned SrcReg, DstReg, SubIdx; if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) return false; @@ -277,7 +328,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // Ensure DstReg can get a register class that actually supports // sub-registers. Don't change the class until we commit. const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); - DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx); + DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); if (!DstRC) return false; @@ -286,8 +337,8 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // register. // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of // SrcReg:SubIdx should be replaced. - bool UseSrcSubIdx = TM->getRegisterInfo()-> - getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; + bool UseSrcSubIdx = + TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. @@ -447,6 +498,12 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { return true; } +/// \brief Check if a simpler conditional branch can be +// generated +bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { + return TII->optimizeCondBranch(MI); +} + /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) /// share the same register file. static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, @@ -477,85 +534,34 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; } -/// \brief Get the index of the definition and source for \p Copy -/// instruction. -/// \pre Copy.isCopy() or Copy.isBitcast(). -/// \return True if the Copy instruction has only one register source -/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx -/// are invalid. -static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy, - unsigned &DefIdx, unsigned &SrcIdx) { - assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type."); - if (Copy.isCopy()) { - // Copy instruction are supposed to be: Def = Src. - if (Copy.getDesc().getNumOperands() != 2) - return false; - DefIdx = 0; - SrcIdx = 1; - assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!"); - return true; - } - // Bitcast case. - // Bitcasts with more than one def are not supported. - if (Copy.getDesc().getNumDefs() != 1) - return false; - // Initialize SrcIdx to an undefined operand. - SrcIdx = Copy.getDesc().getNumOperands(); - for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { - const MachineOperand &MO = Copy.getOperand(OpIdx); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef()) - DefIdx = OpIdx; - else if (SrcIdx != EndOpIdx) - // Multiple sources? - return false; - SrcIdx = OpIdx; - } - return true; -} - -/// \brief Optimize a copy or bitcast instruction to avoid cross -/// register bank copy. The optimization looks through a chain of -/// copies and try to find a source that has a compatible register -/// class. -/// Two register classes are considered to be compatible if they share -/// the same register bank. -/// New copies issued by this optimization are register allocator -/// friendly. This optimization does not remove any copy as it may -/// overconstraint the register allocator, but replaces some when -/// possible. -/// \pre \p MI is a Copy (MI->isCopy() is true) -/// \return True, when \p MI has been optimized. In that case, \p MI has -/// been removed from its parent. -bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { - unsigned DefIdx, SrcIdx; - if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx)) +/// \brief Try to find the next source that share the same register file +/// for the value defined by \p Reg and \p SubReg. +/// When true is returned, \p Reg and \p SubReg are updated with the +/// register number and sub-register index of the new source. +/// \return False if no alternative sources are available. True otherwise. +bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { + // Do not try to find a new source for a physical register. + // So far we do not have any motivating example for doing that. + // Thus, instead of maintaining untested code, we will revisit that if + // that changes at some point. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) return false; - const MachineOperand &MODef = MI->getOperand(DefIdx); - assert(MODef.isReg() && "Copies must be between registers."); - unsigned Def = MODef.getReg(); - - if (TargetRegisterInfo::isPhysicalRegister(Def)) - return false; - - const TargetRegisterClass *DefRC = MRI->getRegClass(Def); - unsigned DefSubReg = MODef.getSubReg(); + const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); + unsigned DefSubReg = SubReg; unsigned Src; unsigned SrcSubReg; bool ShouldRewrite = false; - const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); // Follow the chain of copies until we reach the top of the use-def chain // or find a more suitable source. - ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI); + ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); do { - unsigned CopySrcIdx, CopySrcSubReg; - if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg)) + unsigned CopySrcReg, CopySrcSubReg; + if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) break; - Src = ValTracker.getReg(); + Src = CopySrcReg; SrcSubReg = CopySrcSubReg; // Do not extend the live-ranges of physical registers as they add @@ -569,29 +575,411 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); // If this source does not incur a cross register bank copy, use it. - ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, + ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, SrcSubReg); } while (!ShouldRewrite); // If we did not find a more suitable source, there is nothing to optimize. - if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) + if (!ShouldRewrite || Src == Reg) return false; - // Rewrite the copy to avoid a cross register bank penalty. - unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def : - MRI->createVirtualRegister(DefRC); - MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) - .addReg(Src, 0, SrcSubReg); - NewCopy->getOperand(0).setSubReg(DefSubReg); - - MRI->replaceRegWith(Def, NewVR); - MRI->clearKillFlags(NewVR); - // We extended the lifetime of Src. - // Clear the kill flags to account for that. - MRI->clearKillFlags(Src); + Reg = Src; + SubReg = SrcSubReg; + return true; +} + +namespace { +/// \brief Helper class to rewrite the arguments of a copy-like instruction. +class CopyRewriter { +protected: + /// The copy-like instruction. + MachineInstr &CopyLike; + /// The index of the source being rewritten. + unsigned CurrentSrcIdx; + +public: + CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} + + virtual ~CopyRewriter() {} + + /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and + /// the related value that it affects (TrackReg, TrackSubReg). + /// A source is considered rewritable if its register class and the + /// register class of the related TrackReg may not be register + /// coalescer friendly. In other words, given a copy-like instruction + /// not all the arguments may be returned at rewritable source, since + /// some arguments are none to be register coalescer friendly. + /// + /// Each call of this method moves the current source to the next + /// rewritable source. + /// For instance, let CopyLike be the instruction to rewrite. + /// CopyLike has one definition and one source: + /// dst.dstSubIdx = CopyLike src.srcSubIdx. + /// + /// The first call will give the first rewritable source, i.e., + /// the only source this instruction has: + /// (SrcReg, SrcSubReg) = (src, srcSubIdx). + /// This source defines the whole definition, i.e., + /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). + /// + /// The second and subsequent calls will return false, has there is only one + /// rewritable source. + /// + /// \return True if a rewritable source has been found, false otherwise. + /// The output arguments are valid if and only if true is returned. + virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) { + // If CurrentSrcIdx == 1, this means this function has already been + // called once. CopyLike has one defintiion and one argument, thus, + // there is nothing else to rewrite. + if (!CopyLike.isCopy() || CurrentSrcIdx == 1) + return false; + // This is the first call to getNextRewritableSource. + // Move the CurrentSrcIdx to remember that we made that call. + CurrentSrcIdx = 1; + // The rewritable source is the argument. + const MachineOperand &MOSrc = CopyLike.getOperand(1); + SrcReg = MOSrc.getReg(); + SrcSubReg = MOSrc.getSubReg(); + // What we track are the alternative sources of the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + return true; + } + + /// \brief Rewrite the current source with \p NewReg and \p NewSubReg + /// if possible. + /// \return True if the rewritting was possible, false otherwise. + virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { + if (!CopyLike.isCopy() || CurrentSrcIdx != 1) + return false; + MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); + MOSrc.setReg(NewReg); + MOSrc.setSubReg(NewSubReg); + return true; + } +}; + +/// \brief Specialized rewriter for INSERT_SUBREG instruction. +class InsertSubregRewriter : public CopyRewriter { +public: + InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { + assert(MI.isInsertSubreg() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. + /// Src1 has the same register class has dst, hence, there is + /// nothing to rewrite. + /// Src2.src2SubIdx, may not be register coalescer friendly. + /// Therefore, the first call to this method returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx). + /// + /// Subsequence calls will return false. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 2) + return false; + // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. + CurrentSrcIdx = 2; + const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); + SrcReg = MOInsertedReg.getReg(); + SrcSubReg = MOInsertedReg.getSubReg(); + const MachineOperand &MODef = CopyLike.getOperand(0); + + // We want to track something that is compatible with the + // partial definition. + TrackReg = MODef.getReg(); + if (MODef.getSubReg()) + // Bails if we have to compose sub-register indices. + return false; + TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); + return true; + } + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + if (CurrentSrcIdx != 2) + return false; + // We are rewriting the inserted reg. + MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); + MO.setReg(NewReg); + MO.setSubReg(NewSubReg); + return true; + } +}; + +/// \brief Specialized rewriter for EXTRACT_SUBREG instruction. +class ExtractSubregRewriter : public CopyRewriter { + const TargetInstrInfo &TII; + +public: + ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) + : CopyRewriter(MI), TII(TII) { + assert(MI.isExtractSubreg() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. + /// There is only one rewritable source: Src.subIdx, + /// which defines dst.dstSubIdx. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 1) + return false; + // We are looking at v1 = EXTRACT_SUBREG v0, sub0. + CurrentSrcIdx = 1; + const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); + SrcReg = MOExtractedReg.getReg(); + // If we have to compose sub-register indices, bails out. + if (MOExtractedReg.getSubReg()) + return false; + + SrcSubReg = CopyLike.getOperand(2).getImm(); + + // We want to track something that is compatible with the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + return true; + } + + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + // The only source we can rewrite is the input register. + if (CurrentSrcIdx != 1) + return false; + + CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); + + // If we find a source that does not require to extract something, + // rewrite the operation with a copy. + if (!NewSubReg) { + // Move the current index to an invalid position. + // We do not want another call to this method to be able + // to do any change. + CurrentSrcIdx = -1; + // Rewrite the operation as a COPY. + // Get rid of the sub-register index. + CopyLike.RemoveOperand(2); + // Morph the operation into a COPY. + CopyLike.setDesc(TII.get(TargetOpcode::COPY)); + return true; + } + CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); + return true; + } +}; + +/// \brief Specialized rewriter for REG_SEQUENCE instruction. +class RegSequenceRewriter : public CopyRewriter { +public: + RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { + assert(MI.isRegSequence() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. + /// Each call will return a different source, walking all the available + /// source. + /// + /// The first call returns: + /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx1). + /// + /// The second call returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx2). + /// + /// And so on, until all the sources have been traversed, then + /// it returns false. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. + + // If this is the first call, move to the first argument. + if (CurrentSrcIdx == 0) { + CurrentSrcIdx = 1; + } else { + // Otherwise, move to the next argument and check that it is valid. + CurrentSrcIdx += 2; + if (CurrentSrcIdx >= CopyLike.getNumOperands()) + return false; + } + const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); + SrcReg = MOInsertedReg.getReg(); + // If we have to compose sub-register indices, bails out. + if ((SrcSubReg = MOInsertedReg.getSubReg())) + return false; + + // We want to track something that is compatible with the related + // partial definition. + TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); + + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + // If we have to compose sub-registers, bails. + return MODef.getSubReg() == 0; + } + + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + // We cannot rewrite out of bound operands. + // Moreover, rewritable sources are at odd positions. + if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) + return false; + + MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); + MO.setReg(NewReg); + MO.setSubReg(NewSubReg); + return true; + } +}; +} // End namespace. + +/// \brief Get the appropriated CopyRewriter for \p MI. +/// \return A pointer to a dynamically allocated CopyRewriter or nullptr +/// if no rewriter works for \p MI. +static CopyRewriter *getCopyRewriter(MachineInstr &MI, + const TargetInstrInfo &TII) { + switch (MI.getOpcode()) { + default: + return nullptr; + case TargetOpcode::COPY: + return new CopyRewriter(MI); + case TargetOpcode::INSERT_SUBREG: + return new InsertSubregRewriter(MI); + case TargetOpcode::EXTRACT_SUBREG: + return new ExtractSubregRewriter(MI, TII); + case TargetOpcode::REG_SEQUENCE: + return new RegSequenceRewriter(MI); + } + llvm_unreachable(nullptr); +} + +/// \brief Optimize generic copy instructions to avoid cross +/// register bank copy. The optimization looks through a chain of +/// copies and tries to find a source that has a compatible register +/// class. +/// Two register classes are considered to be compatible if they share +/// the same register bank. +/// New copies issued by this optimization are register allocator +/// friendly. This optimization does not remove any copy as it may +/// overconstraint the register allocator, but replaces some operands +/// when possible. +/// \pre isCoalescableCopy(*MI) is true. +/// \return True, when \p MI has been rewritten. False otherwise. +bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { + assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); + assert(MI->getDesc().getNumDefs() == 1 && + "Coalescer can understand multiple defs?!"); + const MachineOperand &MODef = MI->getOperand(0); + // Do not rewrite physical definitions. + if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + return false; + + bool Changed = false; + // Get the right rewriter for the current copy. + std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII)); + // If none exists, bails out. + if (!CpyRewriter) + return false; + // Rewrite each rewritable source. + unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; + while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, + TrackSubReg)) { + unsigned NewSrc = TrackReg; + unsigned NewSubReg = TrackSubReg; + // Try to find a more suitable source. + // If we failed to do so, or get the actual source, + // move to the next source. + if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) + continue; + // Rewrite source. + if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { + // We may have extended the live-range of NewSrc, account for that. + MRI->clearKillFlags(NewSrc); + Changed = true; + } + } + // TODO: We could have a clean-up method to tidy the instruction. + // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 + // => v0 = COPY v1 + // Currently we haven't seen motivating example for that and we + // want to avoid untested code. + NumRewrittenCopies += Changed == true; + return Changed; +} + +/// \brief Optimize copy-like instructions to create +/// register coalescer friendly instruction. +/// The optimization tries to kill-off the \p MI by looking +/// through a chain of copies to find a source that has a compatible +/// register class. +/// If such a source is found, it replace \p MI by a generic COPY +/// operation. +/// \pre isUncoalescableCopy(*MI) is true. +/// \return True, when \p MI has been optimized. In that case, \p MI has +/// been removed from its parent. +/// All COPY instructions created, are inserted in \p LocalMIs. +bool PeepholeOptimizer::optimizeUncoalescableCopy( + MachineInstr *MI, SmallPtrSetImpl &LocalMIs) { + assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); + + // Check if we can rewrite all the values defined by this instruction. + SmallVector< + std::pair, + 4> RewritePairs; + for (const MachineOperand &MODef : MI->defs()) { + if (MODef.isDead()) + // We can ignore those. + continue; + + // If a physical register is here, this is probably for a good reason. + // Do not rewrite that. + if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + return false; + + // If we do not know how to rewrite this definition, there is no point + // in trying to kill this instruction. + TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); + TargetInstrInfo::RegSubRegPair Src = Def; + if (!findNextSource(Src.Reg, Src.SubReg)) + return false; + RewritePairs.push_back(std::make_pair(Def, Src)); + } + // The change is possible for all defs, do it. + for (const auto &PairDefSrc : RewritePairs) { + const auto &Def = PairDefSrc.first; + const auto &Src = PairDefSrc.second; + // Rewrite the "copy" in a way the register coalescer understands. + assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + "We do not rewrite physical registers"); + const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); + unsigned NewVR = MRI->createVirtualRegister(DefRC); + MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + NewVR).addReg(Src.Reg, 0, Src.SubReg); + NewCopy->getOperand(0).setSubReg(Def.SubReg); + if (Def.SubReg) + NewCopy->getOperand(0).setIsUndef(); + LocalMIs.insert(NewCopy); + MRI->replaceRegWith(Def.Reg, NewVR); + MRI->clearKillFlags(NewVR); + // We extended the lifetime of Src. + // Clear the kill flags to account for that. + MRI->clearKillFlags(Src.Reg); + } + // MI is now dead. MI->eraseFromParent(); - ++NumCopiesBitcasts; + ++NumUncoalescableCopies; return true; } @@ -673,8 +1061,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; - TM = &MF.getTarget(); - TII = TM->getInstrInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = Aggressive ? &getAnalysis() : nullptr; @@ -684,7 +1072,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = &*I; bool SeenMoveImm = false; - SmallPtrSet LocalMIs; + SmallPtrSet LocalMIs; SmallSet ImmDefRegs; DenseMap ImmDefMIs; SmallSet FoldAsLoadDefCandidates; @@ -711,7 +1099,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->mayStore() || MI->isCall()) FoldAsLoadDefCandidates.clear(); - if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || + if ((isUncoalescableCopy(*MI) && + optimizeUncoalescableCopy(MI, LocalMIs)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. @@ -720,6 +1109,17 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } + if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { + Changed = true; + continue; + } + + if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { + // MI is just rewritten. + Changed = true; + continue; + } + if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { SeenMoveImm = true; } else { @@ -781,24 +1181,25 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { return Changed; } -bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg) { assert(Def->isCopy() && "Invalid definition"); // Copy instruction are supposed to be: Def = Src. // If someone breaks this assumption, bad things will happen everywhere. - assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands"); + assert(Def->getNumOperands() == 2 && "Invalid number of operands"); if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) // If we look for a different subreg, it means we want a subreg of src. // Bails as we do not support composing subreg yet. return false; // Otherwise, we want the whole source. - SrcIdx = 1; - SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + const MachineOperand &Src = Def->getOperand(1); + SrcReg = Src.getReg(); + SrcSubReg = Src.getSubReg(); return true; } -bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg) { assert(Def->isBitcast() && "Invalid definition"); @@ -814,7 +1215,7 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, // Bails as we do not support composing subreg yet. return false; - SrcIdx = Def->getDesc().getNumOperands(); + unsigned SrcIdx = Def->getNumOperands(); for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { const MachineOperand &MO = Def->getOperand(OpIdx); @@ -826,13 +1227,16 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, return false; SrcIdx = OpIdx; } - SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + const MachineOperand &Src = Def->getOperand(SrcIdx); + SrcReg = Src.getReg(); + SrcSubReg = Src.getSubReg(); return true; } -bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg) { - assert(Def->isRegSequence() && "Invalid definition"); + assert((Def->isRegSequence() || Def->isRegSequenceLike()) && + "Invalid definition"); if (Def->getOperand(DefIdx).getSubReg()) // If we are composing subreg, bails out. @@ -851,19 +1255,26 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, // turn that into an assertion. return false; + if (!TII) + // We could handle the REG_SEQUENCE here, but we do not want to + // duplicate the code from the generic TII. + return false; + + SmallVector RegSeqInputRegs; + if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) + return false; + // We are looking at: // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... // Check if one of the operand defines the subreg we are interested in. - for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands(); - OpIdx != EndOpIdx; OpIdx += 2) { - const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1); - assert(MOSubIdx.isImm() && - "One of the subindex of the reg_sequence is not an immediate"); - if (MOSubIdx.getImm() == DefSubReg) { - assert(Def->getOperand(OpIdx).isReg() && - "One of the source of the reg_sequence is not a register"); - SrcIdx = OpIdx; - SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + for (auto &RegSeqInput : RegSeqInputRegs) { + if (RegSeqInput.SubIdx == DefSubReg) { + if (RegSeqInput.SubReg) + // Bails if we have to compose sub registers. + return false; + + SrcReg = RegSeqInput.Reg; + SrcSubReg = RegSeqInput.SubReg; return true; } } @@ -874,61 +1285,68 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, return false; } -bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg) { - assert(Def->isInsertSubreg() && "Invalid definition"); + assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && + "Invalid definition"); + if (Def->getOperand(DefIdx).getSubReg()) // If we are composing subreg, bails out. // Same remark as getNextSourceFromRegSequence. // I.e., this may be turned into an assert. return false; + if (!TII) + // We could handle the REG_SEQUENCE here, but we do not want to + // duplicate the code from the generic TII. + return false; + + TargetInstrInfo::RegSubRegPair BaseReg; + TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; + if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) + return false; + // We are looking at: // Def = INSERT_SUBREG v0, v1, sub1 // There are two cases: // 1. DefSubReg == sub1, get v1. // 2. DefSubReg != sub1, the value may be available through v0. - // #1 Check if the inserted register matches the require sub index. - unsigned InsertedSubReg = Def->getOperand(3).getImm(); - if (InsertedSubReg == DefSubReg) { - SrcIdx = 2; - SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + // #1 Check if the inserted register matches the required sub index. + if (InsertedReg.SubIdx == DefSubReg) { + SrcReg = InsertedReg.Reg; + SrcSubReg = InsertedReg.SubReg; return true; } // #2 Otherwise, if the sub register we are looking for is not partial // defined by the inserted element, we can look through the main // register (v0). - // To check the overlapping we need a MRI and a TRI. - if (!MRI) - return false; - const MachineOperand &MODef = Def->getOperand(DefIdx); - const MachineOperand &MOBase = Def->getOperand(1); // If the result register (Def) and the base register (v0) do not // have the same register class or if we have to compose // subregisters, bails out. - if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) || - MOBase.getSubReg()) + if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || + BaseReg.SubReg) return false; - // Get the TRI and check if inserted sub register overlaps with the - // sub register we are tracking. - const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + // Get the TRI and check if the inserted sub-register overlaps with the + // sub-register we are tracking. + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); if (!TRI || (TRI->getSubRegIndexLaneMask(DefSubReg) & - TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0) + TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) return false; // At this point, the value is available in v0 via the same subreg // we used for Def. - SrcIdx = 1; + SrcReg = BaseReg.Reg; SrcSubReg = DefSubReg; return true; } -bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg) { - assert(Def->isExtractSubreg() && "Invalid definition"); + assert((Def->isExtractSubreg() || + Def->isExtractSubregLike()) && "Invalid definition"); // We are looking at: // Def = EXTRACT_SUBREG v0, sub0 @@ -937,17 +1355,26 @@ bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx, if (DefSubReg) return false; + if (!TII) + // We could handle the EXTRACT_SUBREG here, but we do not want to + // duplicate the code from the generic TII. + return false; + + TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; + if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) + return false; + // Bails if we have to compose sub registers. // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. - if (Def->getOperand(1).getSubReg()) + if (ExtractSubregInputReg.SubReg) return false; // Otherwise, the value is available in the v0.sub0. - SrcIdx = 1; - SrcSubReg = Def->getOperand(2).getImm(); + SrcReg = ExtractSubregInputReg.Reg; + SrcSubReg = ExtractSubregInputReg.SubIdx; return true; } -bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx, +bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg) { assert(Def->isSubregToReg() && "Invalid definition"); // We are looking at: @@ -964,37 +1391,37 @@ bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx, if (Def->getOperand(2).getSubReg()) return false; - SrcIdx = 2; + SrcReg = Def->getOperand(2).getReg(); SrcSubReg = Def->getOperand(3).getImm(); return true; } -bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) { +bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { assert(Def && "This method needs a valid definition"); assert( (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); if (Def->isCopy()) - return getNextSourceFromCopy(SrcIdx, SrcSubReg); + return getNextSourceFromCopy(SrcReg, SrcSubReg); if (Def->isBitcast()) - return getNextSourceFromBitcast(SrcIdx, SrcSubReg); + return getNextSourceFromBitcast(SrcReg, SrcSubReg); // All the remaining cases involve "complex" instructions. // Bails if we did not ask for the advanced tracking. if (!UseAdvancedTracking) return false; - if (Def->isRegSequence()) - return getNextSourceFromRegSequence(SrcIdx, SrcSubReg); - if (Def->isInsertSubreg()) - return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg); - if (Def->isExtractSubreg()) - return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg); + if (Def->isRegSequence() || Def->isRegSequenceLike()) + return getNextSourceFromRegSequence(SrcReg, SrcSubReg); + if (Def->isInsertSubreg() || Def->isInsertSubregLike()) + return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); + if (Def->isExtractSubreg() || Def->isExtractSubregLike()) + return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); if (Def->isSubregToReg()) - return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg); + return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); return false; } -const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx, +const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, unsigned &SrcSubReg) { // If we reach a point where we cannot move up in the use-def chain, // there is nothing we can get. @@ -1003,20 +1430,18 @@ const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx, const MachineInstr *PrevDef = nullptr; // Try to find the next source. - if (getNextSourceImpl(SrcIdx, SrcSubReg)) { + if (getNextSourceImpl(SrcReg, SrcSubReg)) { // Update definition, definition index, and subregister for the // next call of getNextSource. - const MachineOperand &MO = Def->getOperand(SrcIdx); - assert(MO.isReg() && !MO.isDef() && "Source is invalid"); // Update the current register. - Reg = MO.getReg(); + Reg = SrcReg; // Update the return value before moving up in the use-def chain. PrevDef = Def; // If we can still move up in the use-def chain, move to the next // defintion. if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { - Def = MRI->getVRegDef(Reg); - DefIdx = MRI->def_begin(Reg).getOperandNo(); + Def = MRI.getVRegDef(Reg); + DefIdx = MRI.def_begin(Reg).getOperandNo(); DefSubReg = SrcSubReg; return PrevDef; } diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index db3933e..89e1d11 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -41,7 +41,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -98,6 +97,11 @@ namespace { } bool runOnMachineFunction(MachineFunction &Fn) override; + + bool enablePostRAScheduler( + const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; }; char PostRAScheduler::ID = 0; @@ -132,10 +136,10 @@ namespace { public: SchedulePostRATDList( - MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, const RegisterClassInfo&, - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, - SmallVectorImpl &CriticalPathRCs); + MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, + const RegisterClassInfo &, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, + SmallVectorImpl &CriticalPathRCs); ~SchedulePostRATDList(); @@ -188,16 +192,17 @@ INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", "Post RA top-down list latency scheduler", false, false) SchedulePostRATDList::SchedulePostRATDList( - MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, - AliasAnalysis *AA, const RegisterClassInfo &RCI, - TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, - SmallVectorImpl &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), EndIndex(0) { - - const TargetMachine &TM = MF.getTarget(); - const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); + MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, + const RegisterClassInfo &RCI, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, + SmallVectorImpl &CriticalPathRCs) + : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) { + + const InstrItineraryData *InstrItins = + MF.getSubtarget().getInstrItineraryData(); HazardRec = - TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer( + InstrItins, this); assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || MRI.tracksLiveness()) && @@ -245,13 +250,23 @@ void SchedulePostRATDList::dumpSchedule() const { } #endif +bool PostRAScheduler::enablePostRAScheduler( + const TargetSubtargetInfo &ST, + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { + Mode = ST.getAntiDepBreakMode(); + ST.getCriticalPathRCs(CriticalPathRCs); + return ST.enablePostMachineScheduler() && + OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (skipOptnoneFunction(*Fn.getFunction())) return false; - TII = Fn.getTarget().getInstrInfo(); + TII = Fn.getSubtarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis(); - MachineDominatorTree &MDT = getAnalysis(); AliasAnalysis *AA = &getAnalysis(); TargetPassConfig *PassConfig = &getAnalysis(); @@ -267,9 +282,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } else { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. - const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget(); - if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode, - CriticalPathRCs)) + const TargetSubtargetInfo &ST = + Fn.getTarget().getSubtarget(); + if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(), + AntiDepMode, CriticalPathRCs)) return false; } @@ -284,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "PostRAScheduler\n"); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode, + SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode, CriticalPathRCs); // Loop over all of the basic blocks @@ -543,10 +559,10 @@ void SchedulePostRATDList::ListScheduleTopDown() { if (HT == ScheduleHazardRecognizer::NoHazard) { if (HazardRec->ShouldPreferAnother(CurSUnit)) { if (!NotPreferredSUnit) { - // If this is the first non-preferred node for this cycle, then - // record it and continue searching for a preferred node. If this - // is not the first non-preferred node, then treat it as though - // there had been a hazard. + // If this is the first non-preferred node for this cycle, then + // record it and continue searching for a preferred node. If this + // is not the first non-preferred node, then treat it as though + // there had been a hazard. NotPreferredSUnit = CurSUnit; continue; } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 3129927..b153800 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -138,8 +139,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); assert(WorkList.empty() && "Inconsistent worklist state"); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index b98d210..06530b9 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -41,6 +41,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -110,8 +111,8 @@ typedef SmallSetVector StackObjSet; /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { const Function* F = Fn.getFunction(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); @@ -185,8 +186,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { - const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); - const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; @@ -239,8 +240,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { /// calculateCalleeSavedRegisters - Scan the function for modified callee saved /// registers. void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { - const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo(); - const TargetFrameLowering *TFI = F.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = F.getFrameInfo(); // Get the callee saved register list... @@ -337,9 +338,9 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { if (CSI.empty()) return; - const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); - const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); MachineBasicBlock::iterator I; // Spill using target interface. @@ -445,7 +446,7 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs, /// abstract stack objects. /// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); StackProtector *SP = &getAnalysis(); bool StackGrowsDown = @@ -515,7 +516,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // incoming stack pointer if a frame pointer is required and is closer // to the incoming rather than the final stack pointer. - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); bool EarlyScavengingSlots = (TFI.hasFP(Fn) && TFI.isFPCloseToIncomingSP() && RegInfo->useFPForScavengingIndex(Fn) && @@ -670,7 +671,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { /// prolog and epilog code to the function. /// void PEI::insertPrologEpilogCode(MachineFunction &Fn) { - const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); // Add prologue to the function... TFI.emitPrologue(Fn); @@ -710,8 +711,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { SmallPtrSet Reachable; // Iterate over the reachable blocks in DFS order. - for (df_ext_iterator > - DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); DFI != DFE; ++DFI) { int SPAdj = 0; // Check the exit state of the DFS stack predecessor. @@ -738,11 +738,11 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, int &SPAdj) { - const TargetMachine &TM = Fn.getTarget(); - assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); - const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); + assert(Fn.getSubtarget().getRegisterInfo() && + "getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); @@ -837,7 +837,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, /// FIXME: Iterating over the instruction stream is unnecessary. We can simply /// iterate over the vreg use list, which at this point only contains machine /// operands for which eliminateFrameIndex need a new scratch reg. -void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { +void +PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Run through the instructions and find any virtual registers. for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { @@ -888,12 +889,16 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); + MachineRegisterInfo &MRI = Fn.getRegInfo(); Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + + // Make sure MRI now accounts this register as used. + MRI.setPhysRegUsed(ScratchReg); // Because this instruction was processed by the RS before this // register was allocated, make sure that the RS now records the // register as being used. - RS->setUsed(ScratchReg); + RS->setRegUsed(ScratchReg); } } diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index 5a6d39a..f88b8ef 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -16,8 +16,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_PEI_H -#define LLVM_CODEGEN_PEI_H +#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H +#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SparseBitVector.h" diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 12b2c90..b1c341d 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -107,13 +107,9 @@ bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{ } bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { - // Negative frame indices are used for special things that don't - // appear in LLVM IR. Non-negative indices may be used for things - // like static allocas. if (!MFI) - return FI >= 0; - // Spill slots should not alias others. - return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI); + return true; + return MFI->isAliasedObjectIndex(FI); } bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 894aee7..122afd1 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #ifndef NDEBUG #include "llvm/ADT/SparseBitVector.h" @@ -102,7 +101,7 @@ void RegAllocBase::allocatePhysRegs() { // register if possible and populate a list of new live intervals that // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " - << MRI->getRegClass(VirtReg->reg)->getName() + << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); typedef SmallVector VirtRegVec; VirtRegVec SplitVRegs; diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index b333c36..bbd79cd 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -34,8 +34,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_REGALLOCBASE -#define LLVM_CODEGEN_REGALLOCBASE +#ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H +#define LLVM_LIB_CODEGEN_REGALLOCBASE_H #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -106,4 +106,4 @@ private: } // end namespace llvm -#endif // !defined(LLVM_CODEGEN_REGALLOCBASE) +#endif diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index b722098..0090332 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -33,7 +33,6 @@ #include "llvm/PassAnalysisSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include #include @@ -157,7 +156,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { } void RABasic::releaseMemory() { - SpillerInstance.reset(nullptr); + SpillerInstance.reset(); } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 97b9f76..8fc10b4 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -33,7 +33,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -53,7 +53,6 @@ namespace { RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), isBulkSpilling(false) {} private: - const TargetMachine *TM; MachineFunction *MF; MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; @@ -298,7 +297,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; - const MDNode *MDPtr = DBG->getOperand(2).getMetadata(); + const MDNode *Var = DBG->getDebugVariable(); + const MDNode *Expr = DBG->getDebugExpression(); bool IsIndirect = DBG->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; DebugLoc DL; @@ -308,10 +308,13 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, DL = (--EI)->getDebugLoc(); } else DL = MI->getDebugLoc(); - MachineBasicBlock *MBB = DBG->getParent(); MachineInstr *NewDV = BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(FI) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); + assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } @@ -545,7 +548,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, } DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " - << RC->getName() << "\n"); + << TRI->getRegClassName(RC) << "\n"); unsigned BestReg = 0, BestCost = spillImpossible; for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ @@ -705,7 +708,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, continue; if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { - if (ThroughRegs.insert(Reg)) + if (ThroughRegs.insert(Reg).second) DEBUG(dbgs() << ' ' << PrintReg(Reg)); } } @@ -862,13 +865,16 @@ void RAFast::AllocateBasicBlock() { // Modify DBG_VALUE now that the value is in a spill slot. bool IsIndirect = MI->isIndirectDebugValue(); uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *MDPtr = - MI->getOperand(MI->getNumOperands()-1).getMetadata(); + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); MachineBasicBlock *MBB = MI->getParent(); MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(SS) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *NewDV); // Scan NewDV operands from the beginning. @@ -1070,9 +1076,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { << "********** Function: " << Fn.getName() << '\n'); MF = &Fn; MRI = &MF->getRegInfo(); - TM = &Fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.clear(); @@ -1093,9 +1098,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { } // Add the clobber lists for all the instructions we skipped earlier. - for (SmallPtrSet::const_iterator - I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) - if (const uint16_t *Defs = (*I)->getImplicitDefs()) + for (const MCInstrDesc *Desc : SkippedInstrs) + if (const uint16_t *Defs = Desc->getImplicitDefs()) while (*Defs) MRI->setPhysRegUsed(*Defs++); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 901b993..8ef5dcd 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -486,7 +486,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { } void RAGreedy::releaseMemory() { - SpillerInstance.reset(nullptr); + SpillerInstance.reset(); ExtraRegInfo.clear(); GlobalCand.clear(); } @@ -514,7 +514,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { // Giant live ranges fall back to the global assignment heuristic, which // prevents excessive spilling in pathological cases. bool ReverseLocal = TRI->reverseLocalAssignment(); - bool ForceGlobal = !ReverseLocal && TRI->mayOverrideLocalAssignment() && + bool ForceGlobal = !ReverseLocal && (Size / SlotIndex::InstrDist) > (2 * MRI->getRegClass(Reg)->getNumRegs()); if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && @@ -817,7 +817,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); unsigned MinCost = RegClassInfo.getMinCost(RC); if (MinCost >= CostPerUseLimit) { - DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost << ", no cheaper registers to be found.\n"); return 0; } @@ -967,14 +967,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, BCS[B].Exit = SpillPlacement::PrefSpill; if (++B == GroupSize) { - ArrayRef Array(BCS, B); - SpillPlacer->addConstraints(Array); + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); B = 0; } } - ArrayRef Array(BCS, B); - SpillPlacer->addConstraints(Array); + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); SpillPlacer->addLinks(makeArrayRef(TBS, T)); } @@ -1013,7 +1011,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Compute through constraints from the interference, or assume that all // through blocks prefer spilling when forming compact regions. - ArrayRef NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); if (Cand.PhysReg) addThroughConstraints(Cand.Intf, NewBlocks); else @@ -1791,9 +1789,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // instructions. // // Try to guess the size of the new interval. - const float EstWeight = normalizeSpillWeight(blockFreq * (NewGaps + 1), - Uses[SplitBefore].distance(Uses[SplitAfter]) + - (LiveBefore + LiveAfter)*SlotIndex::InstrDist); + const float EstWeight = normalizeSpillWeight( + blockFreq * (NewGaps + 1), + Uses[SplitBefore].distance(Uses[SplitAfter]) + + (LiveBefore + LiveAfter) * SlotIndex::InstrDist, + 1); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. DEBUG(dbgs() << " w=" << EstWeight); @@ -2319,13 +2319,13 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; - const TargetMachine &TM = MF->getTarget(); - TRI = TM.getRegisterInfo(); - TII = TM.getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); RCI.runOnMachineFunction(mf); EnableLocalReassign = EnableLocalReassignment || - TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel()); + MF->getSubtarget().enableRALocalReassignment( + MF->getTarget().getOptLevel()); if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index b8d2325..eb7e5633 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -49,9 +49,10 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include #include +#include #include #include #include @@ -61,17 +62,17 @@ using namespace llvm; #define DEBUG_TYPE "regalloc" static RegisterRegAlloc -registerPBQPRepAlloc("pbqp", "PBQP register allocator", +RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator); static cl::opt -pbqpCoalescing("pbqp-coalescing", +PBQPCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); #ifndef NDEBUG static cl::opt -pbqpDumpGraphs("pbqp-dump-graphs", +PBQPDumpGraphs("pbqp-dump-graphs", cl::desc("Dump graphs for each function/round in the compilation unit."), cl::init(false), cl::Hidden); #endif @@ -88,8 +89,8 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(std::unique_ptr &b, char *cPassID=nullptr) - : MachineFunctionPass(ID), builder(b.release()), customPassID(cPassID) { + RegAllocPBQP(char *cPassID = nullptr) + : MachineFunctionPass(ID), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); @@ -117,301 +118,320 @@ private: typedef std::map CoalesceMap; typedef std::set RegSet; - std::unique_ptr builder; - char *customPassID; - MachineFunction *mf; - const TargetMachine *tm; - const TargetRegisterInfo *tri; - const TargetInstrInfo *tii; - MachineRegisterInfo *mri; - const MachineBlockFrequencyInfo *mbfi; - - std::unique_ptr spiller; - LiveIntervals *lis; - LiveStacks *lss; - VirtRegMap *vrm; - - RegSet vregsToAlloc, emptyIntervalVRegs; + RegSet VRegsToAlloc, EmptyIntervalVRegs; /// \brief Finds the initial set of vreg intervals to allocate. - void findVRegIntervalsToAlloc(); + void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); + + /// \brief Constructs an initial graph. + void initializeGraph(PBQPRAGraph &G); /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. - bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, - const PBQP::Solution &solution); + bool mapPBQPToRegAlloc(const PBQPRAGraph &G, + const PBQP::Solution &Solution, + VirtRegMap &VRM, + Spiller &VRegSpiller); /// \brief Postprocessing before final spilling. Sets basic block "live in" /// variables. - void finalizeAlloc() const; + void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, + VirtRegMap &VRM) const; }; char RegAllocPBQP::ID = 0; -} // End anonymous namespace. - -unsigned PBQPRAProblem::getVRegForNode(PBQPRAGraph::NodeId node) const { - Node2VReg::const_iterator vregItr = node2VReg.find(node); - assert(vregItr != node2VReg.end() && "No vreg for node."); - return vregItr->second; -} - -PBQPRAGraph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { - VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); - assert(nodeItr != vreg2Node.end() && "No node for vreg."); - return nodeItr->second; - -} +/// @brief Set spill costs for each node in the PBQP reg-alloc graph. +class SpillCosts : public PBQPRAConstraint { +public: + void apply(PBQPRAGraph &G) override { + LiveIntervals &LIS = G.getMetadata().LIS; + + // A minimum spill costs, so that register constraints can can be set + // without normalization in the [0.0:MinSpillCost( interval. + const PBQP::PBQPNum MinSpillCost = 10.0; + + for (auto NId : G.nodeIds()) { + PBQP::PBQPNum SpillCost = + LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight; + if (SpillCost == 0.0) + SpillCost = std::numeric_limits::min(); + else + SpillCost += MinSpillCost; + PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId)); + NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost; + G.setNodeCosts(NId, std::move(NodeCosts)); + } + } +}; -const PBQPRAProblem::AllowedSet& - PBQPRAProblem::getAllowedSet(unsigned vreg) const { - AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg); - assert(allowedSetItr != allowedSets.end() && "No pregs for vreg."); - const AllowedSet &allowedSet = allowedSetItr->second; - return allowedSet; -} +/// @brief Add interference edges between overlapping vregs. +class Interference : public PBQPRAConstraint { +private: -unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { - assert(isPRegOption(vreg, option) && "Not a preg option."); +private: - const AllowedSet& allowedSet = getAllowedSet(vreg); - assert(option <= allowedSet.size() && "Option outside allowed set."); - return allowedSet[option - 1]; -} + typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; + typedef std::pair IMatrixKey; + typedef DenseMap IMatrixCache; -PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineBlockFrequencyInfo *mbfi, - const RegSet &vregs) { + // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required + // for the fast interference graph construction algorithm. The last is there + // to save us from looking up node ids via the VRegToNode map in the graph + // metadata. + typedef std::tuple + IntervalInfo; - LiveIntervals *LIS = const_cast(lis); - MachineRegisterInfo *mri = &mf->getRegInfo(); - const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); + static SlotIndex getStartPoint(const IntervalInfo &I) { + return std::get<0>(I)->segments[std::get<1>(I)].start; + } - std::unique_ptr p(new PBQPRAProblem()); - PBQPRAGraph &g = p->getGraph(); - RegSet pregs; + static SlotIndex getEndPoint(const IntervalInfo &I) { + return std::get<0>(I)->segments[std::get<1>(I)].end; + } - // Collect the set of preg intervals, record that they're used in the MF. - for (unsigned Reg = 1, e = tri->getNumRegs(); Reg != e; ++Reg) { - if (mri->def_empty(Reg)) - continue; - pregs.insert(Reg); - mri->setPhysRegUsed(Reg); + static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) { + return std::get<2>(I); } - // Iterate over vregs. - for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end(); - vregItr != vregEnd; ++vregItr) { - unsigned vreg = *vregItr; - const TargetRegisterClass *trc = mri->getRegClass(vreg); - LiveInterval *vregLI = &LIS->getInterval(vreg); + static bool lowestStartPoint(const IntervalInfo &I1, + const IntervalInfo &I2) { + // Condition reversed because priority queue has the *highest* element at + // the front, rather than the lowest. + return getStartPoint(I1) > getStartPoint(I2); + } - // Record any overlaps with regmask operands. - BitVector regMaskOverlaps; - LIS->checkRegMaskInterference(*vregLI, regMaskOverlaps); + static bool lowestEndPoint(const IntervalInfo &I1, + const IntervalInfo &I2) { + SlotIndex E1 = getEndPoint(I1); + SlotIndex E2 = getEndPoint(I2); - // Compute an initial allowed set for the current vreg. - typedef std::vector VRAllowed; - VRAllowed vrAllowed; - ArrayRef rawOrder = trc->getRawAllocationOrder(*mf); - for (unsigned i = 0; i != rawOrder.size(); ++i) { - unsigned preg = rawOrder[i]; - if (mri->isReserved(preg)) - continue; + if (E1 < E2) + return true; - // vregLI crosses a regmask operand that clobbers preg. - if (!regMaskOverlaps.empty() && !regMaskOverlaps.test(preg)) - continue; + if (E1 > E2) + return false; - // vregLI overlaps fixed regunit interference. - bool Interference = false; - for (MCRegUnitIterator Units(preg, tri); Units.isValid(); ++Units) { - if (vregLI->overlaps(LIS->getRegUnit(*Units))) { - Interference = true; - break; - } - } - if (Interference) - continue; - - // preg is usable for this virtual register. - vrAllowed.push_back(preg); - } - - PBQP::Vector nodeCosts(vrAllowed.size() + 1, 0); + // If two intervals end at the same point, we need a way to break the tie or + // the set will assume they're actually equal and refuse to insert a + // "duplicate". Just compare the vregs - fast and guaranteed unique. + return std::get<0>(I1)->reg < std::get<0>(I2)->reg; + } - PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ? - vregLI->weight : std::numeric_limits::min(); + static bool isAtLastSegment(const IntervalInfo &I) { + return std::get<1>(I) == std::get<0>(I)->size() - 1; + } - addSpillCosts(nodeCosts, spillCost); + static IntervalInfo nextSegment(const IntervalInfo &I) { + return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I)); + } - // Construct the node. - PBQPRAGraph::NodeId nId = g.addNode(std::move(nodeCosts)); +public: - // Record the mapping and allowed set in the problem. - p->recordVReg(vreg, nId, vrAllowed.begin(), vrAllowed.end()); + void apply(PBQPRAGraph &G) override { + // The following is loosely based on the linear scan algorithm introduced in + // "Linear Scan Register Allocation" by Poletto and Sarkar. This version + // isn't linear, because the size of the active set isn't bound by the + // number of registers, but rather the size of the largest clique in the + // graph. Still, we expect this to be better than N^2. + LiveIntervals &LIS = G.getMetadata().LIS; + + // Interferenc matrices are incredibly regular - they're only a function of + // the allowed sets, so we cache them to avoid the overhead of constructing + // and uniquing them. + IMatrixCache C; + + typedef std::set IntervalSet; + typedef std::priority_queue, + decltype(&lowestStartPoint)> IntervalQueue; + IntervalSet Active(lowestEndPoint); + IntervalQueue Inactive(lowestStartPoint); + + // Start by building the inactive set. + for (auto NId : G.nodeIds()) { + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + LiveInterval &LI = LIS.getInterval(VReg); + assert(!LI.empty() && "PBQP graph contains node for empty interval"); + Inactive.push(std::make_tuple(&LI, 0, NId)); + } - } + while (!Inactive.empty()) { + // Tentatively grab the "next" interval - this choice may be overriden + // below. + IntervalInfo Cur = Inactive.top(); + + // Retire any active intervals that end before Cur starts. + IntervalSet::iterator RetireItr = Active.begin(); + while (RetireItr != Active.end() && + (getEndPoint(*RetireItr) <= getStartPoint(Cur))) { + // If this interval has subsequent segments, add the next one to the + // inactive list. + if (!isAtLastSegment(*RetireItr)) + Inactive.push(nextSegment(*RetireItr)); + + ++RetireItr; + } + Active.erase(Active.begin(), RetireItr); + + // One of the newly retired segments may actually start before the + // Cur segment, so re-grab the front of the inactive list. + Cur = Inactive.top(); + Inactive.pop(); + + // At this point we know that Cur overlaps all active intervals. Add the + // interference edges. + PBQP::GraphBase::NodeId NId = getNodeId(Cur); + for (const auto &A : Active) { + PBQP::GraphBase::NodeId MId = getNodeId(A); + + // Check that we haven't already added this edge + // FIXME: findEdge is expensive in the worst case (O(max_clique(G))). + // It might be better to replace this with a local bit-matrix. + if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId()) + continue; - for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end(); - vr1Itr != vrEnd; ++vr1Itr) { - unsigned vr1 = *vr1Itr; - const LiveInterval &l1 = lis->getInterval(vr1); - const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1); - - for (RegSet::const_iterator vr2Itr = std::next(vr1Itr); vr2Itr != vrEnd; - ++vr2Itr) { - unsigned vr2 = *vr2Itr; - const LiveInterval &l2 = lis->getInterval(vr2); - const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2); - - assert(!l2.empty() && "Empty interval in vreg set?"); - if (l1.overlaps(l2)) { - PBQP::Matrix edgeCosts(vr1Allowed.size()+1, vr2Allowed.size()+1, 0); - addInterferenceCosts(edgeCosts, vr1Allowed, vr2Allowed, tri); - - g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), - std::move(edgeCosts)); + // This is a new edge - add it to the graph. + createInterferenceEdge(G, NId, MId, C); } + + // Finally, add Cur to the Active set. + Active.insert(Cur); } } - return p.release(); -} +private: -void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec, - PBQP::PBQPNum spillCost) { - costVec[0] = spillCost; -} + void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, IMatrixCache &C) { -void PBQPBuilder::addInterferenceCosts( - PBQP::Matrix &costMat, - const PBQPRAProblem::AllowedSet &vr1Allowed, - const PBQPRAProblem::AllowedSet &vr2Allowed, - const TargetRegisterInfo *tri) { - assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch."); - assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch."); + const TargetRegisterInfo &TRI = + *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); - for (unsigned i = 0; i != vr1Allowed.size(); ++i) { - unsigned preg1 = vr1Allowed[i]; + const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); + const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); - for (unsigned j = 0; j != vr2Allowed.size(); ++j) { - unsigned preg2 = vr2Allowed[j]; + // Try looking the edge costs up in the IMatrixCache first. + IMatrixKey K(&NRegs, &MRegs); + IMatrixCache::iterator I = C.find(K); + if (I != C.end()) { + G.addEdgeBypassingCostAllocator(NId, MId, I->second); + return; + } - if (tri->regsOverlap(preg1, preg2)) { - costMat[i + 1][j + 1] = std::numeric_limits::infinity(); + PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); + for (unsigned I = 0; I != NRegs.size(); ++I) { + unsigned PRegN = NRegs[I]; + for (unsigned J = 0; J != MRegs.size(); ++J) { + unsigned PRegM = MRegs[J]; + if (TRI.regsOverlap(PRegN, PRegM)) + M[I + 1][J + 1] = std::numeric_limits::infinity(); } } + + PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); + C[K] = G.getEdgeCostsPtr(EId); } -} +}; -PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, - const LiveIntervals *lis, - const MachineBlockFrequencyInfo *mbfi, - const RegSet &vregs) { - std::unique_ptr p(PBQPBuilder::build(mf, lis, mbfi, vregs)); - PBQPRAGraph &g = p->getGraph(); +class Coalescing : public PBQPRAConstraint { +public: + void apply(PBQPRAGraph &G) override { + MachineFunction &MF = G.getMetadata().MF; + MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI; + CoalescerPair CP(*MF.getTarget().getSubtargetImpl()->getRegisterInfo()); + + // Scan the machine function and add a coalescing cost whenever CoalescerPair + // gives the Ok. + for (const auto &MBB : MF) { + for (const auto &MI : MBB) { + + // Skip not-coalescable or already coalesced copies. + if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg()) + continue; - const TargetMachine &tm = mf->getTarget(); - CoalescerPair cp(*tm.getRegisterInfo()); + unsigned DstReg = CP.getDstReg(); + unsigned SrcReg = CP.getSrcReg(); - // Scan the machine function and add a coalescing cost whenever CoalescerPair - // gives the Ok. - for (const auto &mbb : *mf) { - for (const auto &mi : mbb) { - if (!cp.setRegisters(&mi)) { - continue; // Not coalescable. - } + const float Scale = 1.0f / MBFI.getEntryFreq(); + PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale; - if (cp.getSrcReg() == cp.getDstReg()) { - continue; // Already coalesced. - } + if (CP.isPhys()) { + if (!MF.getRegInfo().isAllocatable(DstReg)) + continue; - unsigned dst = cp.getDstReg(), - src = cp.getSrcReg(); + PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg); - const float copyFactor = 0.5; // Cost of copy relative to load. Current - // value plucked randomly out of the air. + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed = + G.getNodeMetadata(NId).getAllowedRegs(); - PBQP::PBQPNum cBenefit = - copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, &mi); + unsigned PRegOpt = 0; + while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg) + ++PRegOpt; - if (cp.isPhys()) { - if (!mf->getRegInfo().isAllocatable(dst)) { - continue; - } - - const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src); - unsigned pregOpt = 0; - while (pregOpt < allowed.size() && allowed[pregOpt] != dst) { - ++pregOpt; - } - if (pregOpt < allowed.size()) { - ++pregOpt; // +1 to account for spill option. - PBQPRAGraph::NodeId node = p->getNodeForVReg(src); - llvm::dbgs() << "Reading node costs for node " << node << "\n"; - llvm::dbgs() << "Source node: " << &g.getNodeCosts(node) << "\n"; - PBQP::Vector newCosts(g.getNodeCosts(node)); - addPhysRegCoalesce(newCosts, pregOpt, cBenefit); - g.setNodeCosts(node, newCosts); - } - } else { - const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); - const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQPRAGraph::NodeId node1 = p->getNodeForVReg(dst); - PBQPRAGraph::NodeId node2 = p->getNodeForVReg(src); - PBQPRAGraph::EdgeId edge = g.findEdge(node1, node2); - if (edge == g.invalidEdgeId()) { - PBQP::Matrix costs(allowed1->size() + 1, allowed2->size() + 1, 0); - addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit); - g.addEdge(node1, node2, costs); + if (PRegOpt < Allowed.size()) { + PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId)); + NewCosts[PRegOpt + 1] -= CBenefit; + G.setNodeCosts(NId, std::move(NewCosts)); + } } else { - if (g.getEdgeNode1Id(edge) == node2) { - std::swap(node1, node2); - std::swap(allowed1, allowed2); + PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg); + PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg); + const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 = + &G.getNodeMetadata(N1Id).getAllowedRegs(); + const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 = + &G.getNodeMetadata(N2Id).getAllowedRegs(); + + PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id); + if (EId == G.invalidEdgeId()) { + PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1, + Allowed2->size() + 1, 0); + addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); + G.addEdge(N1Id, N2Id, std::move(Costs)); + } else { + if (G.getEdgeNode1Id(EId) == N2Id) { + std::swap(N1Id, N2Id); + std::swap(Allowed1, Allowed2); + } + PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId)); + addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); + G.setEdgeCosts(EId, std::move(Costs)); } - PBQP::Matrix costs(g.getEdgeCosts(edge)); - addVirtRegCoalesce(costs, *allowed1, *allowed2, cBenefit); - g.setEdgeCosts(edge, costs); } } } } - return p.release(); -} - -void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec, - unsigned pregOption, - PBQP::PBQPNum benefit) { - costVec[pregOption] += -benefit; -} - -void PBQPBuilderWithCoalescing::addVirtRegCoalesce( - PBQP::Matrix &costMat, - const PBQPRAProblem::AllowedSet &vr1Allowed, - const PBQPRAProblem::AllowedSet &vr2Allowed, - PBQP::PBQPNum benefit) { - - assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch."); - assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch."); - - for (unsigned i = 0; i != vr1Allowed.size(); ++i) { - unsigned preg1 = vr1Allowed[i]; - for (unsigned j = 0; j != vr2Allowed.size(); ++j) { - unsigned preg2 = vr2Allowed[j]; +private: - if (preg1 == preg2) { - costMat[i + 1][j + 1] += -benefit; + void addVirtRegCoalesce( + PBQPRAGraph::RawMatrix &CostMat, + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1, + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2, + PBQP::PBQPNum Benefit) { + assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch."); + assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch."); + for (unsigned I = 0; I != Allowed1.size(); ++I) { + unsigned PReg1 = Allowed1[I]; + for (unsigned J = 0; J != Allowed2.size(); ++J) { + unsigned PReg2 = Allowed2[J]; + if (PReg1 == PReg2) + CostMat[I + 1][J + 1] -= Benefit; } } } -} +}; + +} // End anonymous namespace. + +// Out-of-line destructor/anchor for PBQPRAConstraint. +PBQPRAConstraint::~PBQPRAConstraint() {} +void PBQPRAConstraint::anchor() {} +void PBQPRAConstraintList::anchor() {} void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesCFG(); @@ -437,118 +457,197 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { MachineFunctionPass::getAnalysisUsage(au); } -void RegAllocPBQP::findVRegIntervalsToAlloc() { +void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, + LiveIntervals &LIS) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); // Iterate over all live ranges. - for (unsigned i = 0, e = mri->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (mri->reg_nodbg_empty(Reg)) + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + if (MRI.reg_nodbg_empty(Reg)) continue; - LiveInterval *li = &lis->getInterval(Reg); + LiveInterval &LI = LIS.getInterval(Reg); // If this live interval is non-empty we will use pbqp to allocate it. // Empty intervals we allocate in a simple post-processing stage in // finalizeAlloc. - if (!li->empty()) { - vregsToAlloc.insert(li->reg); + if (!LI.empty()) { + VRegsToAlloc.insert(LI.reg); } else { - emptyIntervalVRegs.insert(li->reg); + EmptyIntervalVRegs.insert(LI.reg); + } + } +} + +static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, + const MachineFunction &MF) { + const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSR[i] != 0; ++i) + if (TRI.regsOverlap(reg, CSR[i])) + return true; + return false; +} + +void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) { + MachineFunction &MF = G.getMetadata().MF; + + LiveIntervals &LIS = G.getMetadata().LIS; + const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); + const TargetRegisterInfo &TRI = + *G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + + for (auto VReg : VRegsToAlloc) { + const TargetRegisterClass *TRC = MRI.getRegClass(VReg); + LiveInterval &VRegLI = LIS.getInterval(VReg); + + // Record any overlaps with regmask operands. + BitVector RegMaskOverlaps; + LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); + + // Compute an initial allowed set for the current vreg. + std::vector VRegAllowed; + ArrayRef RawPRegOrder = TRC->getRawAllocationOrder(MF); + for (unsigned I = 0; I != RawPRegOrder.size(); ++I) { + unsigned PReg = RawPRegOrder[I]; + if (MRI.isReserved(PReg)) + continue; + + // vregLI crosses a regmask operand that clobbers preg. + if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg)) + continue; + + // vregLI overlaps fixed regunit interference. + bool Interference = false; + for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) { + if (VRegLI.overlaps(LIS.getRegUnit(*Units))) { + Interference = true; + break; + } + } + if (Interference) + continue; + + // preg is usable for this virtual register. + VRegAllowed.push_back(PReg); } + + PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); + + // Tweak cost of callee saved registers, as using then force spilling and + // restoring them. This would only happen in the prologue / epilogue though. + for (unsigned i = 0; i != VRegAllowed.size(); ++i) + if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF)) + NodeCosts[1 + i] += 1.0; + + PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts)); + G.getNodeMetadata(NId).setVReg(VReg); + G.getNodeMetadata(NId).setAllowedRegs( + G.getMetadata().getAllowedRegs(std::move(VRegAllowed))); + G.getMetadata().setNodeIdForVReg(VReg, NId); } } -bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, - const PBQP::Solution &solution) { +bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, + const PBQP::Solution &Solution, + VirtRegMap &VRM, + Spiller &VRegSpiller) { + MachineFunction &MF = G.getMetadata().MF; + LiveIntervals &LIS = G.getMetadata().LIS; + const TargetRegisterInfo &TRI = + *MF.getTarget().getSubtargetImpl()->getRegisterInfo(); + (void)TRI; + // Set to true if we have any spills - bool anotherRoundNeeded = false; + bool AnotherRoundNeeded = false; // Clear the existing allocation. - vrm->clearAllVirt(); + VRM.clearAllVirt(); - const PBQPRAGraph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (auto NId : g.nodeIds()) { - unsigned vreg = problem.getVRegForNode(NId); - unsigned alloc = solution.getSelection(NId); - - if (problem.isPRegOption(vreg, alloc)) { - unsigned preg = problem.getPRegForOption(vreg, alloc); - DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> " - << tri->getName(preg) << "\n"); - assert(preg != 0 && "Invalid preg selected."); - vrm->assignVirt2Phys(vreg, preg); - } else if (problem.isSpillOption(vreg, alloc)) { - vregsToAlloc.erase(vreg); - SmallVector newSpills; - LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); - spiller->spill(LRE); - - DEBUG(dbgs() << "VREG " << PrintReg(vreg, tri) << " -> SPILLED (Cost: " + for (auto NId : G.nodeIds()) { + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + unsigned AllocOption = Solution.getSelection(NId); + + if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { + unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> " + << TRI.getName(PReg) << "\n"); + assert(PReg != 0 && "Invalid preg selected."); + VRM.assignVirt2Phys(VReg, PReg); + } else { + VRegsToAlloc.erase(VReg); + SmallVector NewSpills; + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewSpills, MF, LIS, &VRM); + VRegSpiller.spill(LRE); + + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. - for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); - itr != end; ++itr) { - LiveInterval &li = lis->getInterval(*itr); - assert(!li.empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg(li.reg, tri) << " "); - vregsToAlloc.insert(li.reg); + for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); + I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); + assert(!LI.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); + VRegsToAlloc.insert(LI.reg); } DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. - anotherRoundNeeded |= !LRE.empty(); - } else { - llvm_unreachable("Unknown allocation option."); + AnotherRoundNeeded |= !LRE.empty(); } } - return !anotherRoundNeeded; + return !AnotherRoundNeeded; } +void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, + LiveIntervals &LIS, + VirtRegMap &VRM) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); -void RegAllocPBQP::finalizeAlloc() const { // First allocate registers for the empty intervals. for (RegSet::const_iterator - itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end(); - itr != end; ++itr) { - LiveInterval *li = &lis->getInterval(*itr); + I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end(); + I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); - unsigned physReg = mri->getSimpleHint(li->reg); + unsigned PReg = MRI.getSimpleHint(LI.reg); - if (physReg == 0) { - const TargetRegisterClass *liRC = mri->getRegClass(li->reg); - physReg = liRC->getRawAllocationOrder(*mf).front(); + if (PReg == 0) { + const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); + PReg = RC.getRawAllocationOrder(MF).front(); } - vrm->assignVirt2Phys(li->reg, physReg); + VRM.assignVirt2Phys(LI.reg, PReg); } } -bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { +static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, + unsigned NumInstr) { + // All intervals have a spill weight that is mostly proportional to the number + // of uses, with uses in loops having a bigger weight. + return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1); +} - mf = &MF; - tm = &mf->getTarget(); - tri = tm->getRegisterInfo(); - tii = tm->getInstrInfo(); - mri = &mf->getRegInfo(); +bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { + LiveIntervals &LIS = getAnalysis(); + MachineBlockFrequencyInfo &MBFI = + getAnalysis(); - lis = &getAnalysis(); - lss = &getAnalysis(); - mbfi = &getAnalysis(); + calculateSpillWeightsAndHints(LIS, MF, getAnalysis(), MBFI, + normalizePBQPSpillWeight); - calculateSpillWeightsAndHints(*lis, MF, getAnalysis(), - *mbfi); + VirtRegMap &VRM = getAnalysis(); - vrm = &getAnalysis(); - spiller.reset(createInlineSpiller(*this, MF, *vrm)); + std::unique_ptr VRegSpiller(createInlineSpiller(*this, MF, VRM)); - mri->freezeReservedRegs(MF); + MF.getRegInfo().freezeReservedRegs(MF); - DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n"); + DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); // Allocator main loop: // @@ -560,72 +659,72 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // This process is continued till no more spills are generated. // Find the vreg intervals in need of allocation. - findVRegIntervalsToAlloc(); + findVRegIntervalsToAlloc(MF, LIS); #ifndef NDEBUG - const Function* func = mf->getFunction(); - std::string fqn = - func->getParent()->getModuleIdentifier() + "." + - func->getName().str(); + const Function &F = *MF.getFunction(); + std::string FullyQualifiedName = + F.getParent()->getModuleIdentifier() + "." + F.getName().str(); #endif // If there are non-empty intervals allocate them using pbqp. - if (!vregsToAlloc.empty()) { + if (!VRegsToAlloc.empty()) { - bool pbqpAllocComplete = false; - unsigned round = 0; + const TargetSubtargetInfo &Subtarget = *MF.getTarget().getSubtargetImpl(); + std::unique_ptr ConstraintsRoot = + llvm::make_unique(); + ConstraintsRoot->addConstraint(llvm::make_unique()); + ConstraintsRoot->addConstraint(llvm::make_unique()); + if (PBQPCoalescing) + ConstraintsRoot->addConstraint(llvm::make_unique()); + ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); - while (!pbqpAllocComplete) { - DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); + bool PBQPAllocComplete = false; + unsigned Round = 0; - std::unique_ptr problem( - builder->build(mf, lis, mbfi, vregsToAlloc)); + while (!PBQPAllocComplete) { + DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); + + PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); + initializeGraph(G); + ConstraintsRoot->apply(G); #ifndef NDEBUG - if (pbqpDumpGraphs) { - std::ostringstream rs; - rs << round; - std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); - std::string tmp; - raw_fd_ostream os(graphFileName.c_str(), tmp, sys::fs::F_Text); - DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" - << graphFileName << "\"\n"); - problem->getGraph().dump(os); + if (PBQPDumpGraphs) { + std::ostringstream RS; + RS << Round; + std::string GraphFileName = FullyQualifiedName + "." + RS.str() + + ".pbqpgraph"; + std::error_code EC; + raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); + DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" + << GraphFileName << "\"\n"); + G.dumpToStream(OS); } #endif - PBQP::Solution solution = - PBQP::RegAlloc::solve(problem->getGraph()); - - pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution); - - ++round; + PBQP::Solution Solution = PBQP::RegAlloc::solve(G); + PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller); + ++Round; } } // Finalise allocation, allocate empty ranges. - finalizeAlloc(); - vregsToAlloc.clear(); - emptyIntervalVRegs.clear(); + finalizeAlloc(MF, LIS, VRM); + VRegsToAlloc.clear(); + EmptyIntervalVRegs.clear(); - DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); return true; } -FunctionPass * -llvm::createPBQPRegisterAllocator(std::unique_ptr &builder, - char *customPassID) { - return new RegAllocPBQP(builder, customPassID); +FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { + return new RegAllocPBQP(customPassID); } FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { - std::unique_ptr Builder; - if (pbqpCoalescing) - Builder.reset(new PBQPBuilderWithCoalescing()); - else - Builder.reset(new PBQPBuilder()); - return createPBQPRegisterAllocator(Builder); + return createPBQPRegisterAllocator(); } #undef DEBUG_TYPE diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 8b5445c..e0d1aa2 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -20,7 +20,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -38,8 +37,8 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { MF = &mf; // Allocate new array the first time we see a new target. - if (MF->getTarget().getRegisterInfo() != TRI) { - TRI = MF->getTarget().getRegisterInfo(); + if (MF->getSubtarget().getRegisterInfo() != TRI) { + TRI = MF->getSubtarget().getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); unsigned NumPSets = TRI->getNumRegPressureSets(); PSetLimits.reset(new unsigned[NumPSets]); @@ -138,7 +137,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.LastCostChange = LastCostChange; DEBUG({ - dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; + dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = ["; for (unsigned I = 0; I != RCI.NumRegs; ++I) dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n"); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 65b0528..2d2dc92 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -94,11 +94,11 @@ namespace { /// blocks exclusively containing copies. bool JoinSplitEdges; - /// WorkList - Copy instructions yet to be coalesced. + /// Copy instructions yet to be coalesced. SmallVector WorkList; SmallVector LocalWorkList; - /// ErasedInstrs - Set of instruction pointers that have been erased, and + /// Set of instruction pointers that have been erased, and /// that may be present in WorkList. SmallPtrSet ErasedInstrs; @@ -114,21 +114,21 @@ namespace { /// LiveRangeEdit callback. void LRE_WillEraseInstruction(MachineInstr *MI) override; - /// coalesceLocals - coalesce the LocalWorkList. + /// Coalesce the LocalWorkList. void coalesceLocals(); - /// joinAllIntervals - join compatible live intervals + /// Join compatible live intervals void joinAllIntervals(); - /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting + /// Coalesce copies in the specified MBB, putting /// copies that cannot yet be coalesced into WorkList. void copyCoalesceInMBB(MachineBasicBlock *MBB); - /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return + /// Try to coalesce all copies in CurrList. Return /// true if any progress was made. bool copyCoalesceWorkList(MutableArrayRef CurrList); - /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, + /// Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns /// true if the copy was successfully coalesced away. If it is not /// currently possible to coalesce this interval, but it may be possible if @@ -136,7 +136,7 @@ namespace { /// 'Again'. bool joinCopy(MachineInstr *TheCopy, bool &Again); - /// joinIntervals - Attempt to join these two intervals. On failure, this + /// Attempt to join these two intervals. On failure, this /// returns false. The output "SrcInt" will not have been modified, so we /// can use this information below to update aliases. bool joinIntervals(CoalescerPair &CP); @@ -147,39 +147,39 @@ namespace { /// Attempt joining with a reserved physreg. bool joinReservedPhysReg(CoalescerPair &CP); - /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If + /// We found a non-trivially-coalescable copy. If /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single /// value number, eliminating a copy. bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); - /// hasOtherReachingDefs - Return true if there are definitions of IntB + /// Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, VNInfo *BValNo); - /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy. + /// We found a non-trivially-coalescable copy. /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); - /// reMaterializeTrivialDef - If the source of a copy is defined by a + /// If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, bool &IsDefCopy); - /// canJoinPhys - Return true if a physreg copy should be joined. + /// Return true if a physreg copy should be joined. bool canJoinPhys(const CoalescerPair &CP); - /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and + /// Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); - /// eliminateUndefCopy - Handle copies of undef values. + /// Handle copies of undef values. bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); public: @@ -192,10 +192,10 @@ namespace { void releaseMemory() override; - /// runOnMachineFunction - pass entry point + /// This is the pass entry point. bool runOnMachineFunction(MachineFunction&) override; - /// print - Implement the dump method. + /// Implement the dump method. void print(raw_ostream &O, const Module* = nullptr) const override; }; } /// end anonymous namespace @@ -407,7 +407,7 @@ void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { ErasedInstrs.insert(MI); } -/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA +/// We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a copy from B, /// see if we can merge these two pieces of B into a single value number, @@ -512,7 +512,7 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, return true; } -/// hasOtherReachingDefs - Return true if there are definitions of IntB +/// Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, @@ -542,7 +542,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, return false; } -/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// We found a non-trivially-coalescable copy with /// IntA being the source and IntB being the dest, thus this defines a value /// number in IntB. If the source value number (in IntA) is defined by a /// commutable instruction and its other operand is coalesced to the copy dest @@ -725,7 +725,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, return true; } -/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial +/// If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, @@ -904,7 +904,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return true; } -/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of +/// ProcessImpicitDefs may leave some copies of /// values, it only removes local variables. When we have a copy like: /// /// %vreg1 = COPY %vreg2 @@ -944,11 +944,10 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, return true; } -/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and -/// update the subregister number if it is not zero. If DstReg is a -/// physical register and the existing subregister number of the def / use -/// being updated is not zero, make sure to set it to the correct physical -/// subregister. +/// Replace all defs and uses of SrcReg to DstReg and update the subregister +/// number if it is not zero. If DstReg is a physical register and the existing +/// subregister number of the def / use being updated is not zero, make sure to +/// set it to the correct physical subregister. void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { @@ -966,7 +965,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, // the UseMI operands removes them from the SrcReg use-def chain, but when // SrcReg is DstReg we could encounter UseMI twice if it has multiple // operands mentioning the virtual register. - if (SrcReg == DstReg && !Visited.insert(UseMI)) + if (SrcReg == DstReg && !Visited.insert(UseMI).second) continue; SmallVector Ops; @@ -1003,7 +1002,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, } } -/// canJoinPhys - Return true if a copy involving a physreg should be joined. +/// Return true if a copy involving a physreg should be joined. bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is @@ -1021,7 +1020,7 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { return false; } -/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, +/// Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true /// if the copy was successfully coalesced away. If it is not currently /// possible to coalesce this interval, but it may be possible if other @@ -1037,6 +1036,22 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { return false; } + if (CP.getNewRC()) { + auto SrcRC = MRI->getRegClass(CP.getSrcReg()); + auto DstRC = MRI->getRegClass(CP.getDstReg()); + unsigned SrcIdx = CP.getSrcIdx(); + unsigned DstIdx = CP.getDstIdx(); + if (CP.isFlipped()) { + std::swap(SrcIdx, DstIdx); + std::swap(SrcRC, DstRC); + } + if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, + CP.getNewRC())) { + DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); + return false; + } + } + // Dead code elimination. This really should be handled by MachineDCE, but // sometimes dead copies slip through, and we can't generate invalid live // ranges. @@ -1090,9 +1105,14 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { return false; } } else { + // When possible, let DstReg be the larger interval. + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > + LIS->getInterval(CP.getDstReg()).size()) + CP.flip(); + DEBUG({ - dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName() - << " with "; + dbgs() << "\tConsidering merging to " + << TRI->getRegClassName(CP.getNewRC()) << " with "; if (CP.getDstIdx() && CP.getSrcIdx()) dbgs() << PrintReg(CP.getDstReg()) << " in " << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " @@ -1102,11 +1122,6 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; }); - - // When possible, let DstReg be the larger interval. - if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > - LIS->getInterval(CP.getDstReg()).size()) - CP.flip(); } // Okay, attempt to join these two intervals. On failure, this returns false. @@ -1171,7 +1186,9 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - dbgs() << "\tJoined. Result = "; + dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) + << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; + dbgs() << "\tResult = "; if (CP.isPhys()) dbgs() << PrintReg(CP.getDstReg(), TRI); else @@ -1423,7 +1440,7 @@ public: /// Add erased instructions to ErasedInstrs. /// Add foreign virtual registers to ShrinkRegs if their live range ended at /// the erased instrs. - void eraseInstrs(SmallPtrSet &ErasedInstrs, + void eraseInstrs(SmallPtrSetImpl &ErasedInstrs, SmallVectorImpl &ShrinkRegs); /// Get the value assignments suitable for passing to LiveInterval::join. @@ -1936,7 +1953,7 @@ void JoinVals::pruneValues(JoinVals &Other, } } -void JoinVals::eraseInstrs(SmallPtrSet &ErasedInstrs, +void JoinVals::eraseInstrs(SmallPtrSetImpl &ErasedInstrs, SmallVectorImpl &ShrinkRegs) { for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { // Get the def location before markUnused() below invalidates it. @@ -2035,8 +2052,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { return true; } -/// joinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. +/// Attempt to join these two intervals. On failure, this returns false. bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); } @@ -2208,8 +2224,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { MF = &fn; MRI = &fn.getRegInfo(); TM = &fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); + TRI = TM->getSubtargetImpl()->getRegisterInfo(); + TII = TM->getSubtargetImpl()->getInstrInfo(); LIS = &getAnalysis(); AA = &getAnalysis(); Loops = &getAnalysis(); @@ -2250,7 +2266,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { continue; if (MRI->recomputeRegClass(Reg, *TM)) { DEBUG(dbgs() << PrintReg(Reg) << " inflated to " - << MRI->getRegClass(Reg)->getName() << '\n'); + << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); ++NumInflated; } } @@ -2261,7 +2277,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { return true; } -/// print - Implement the dump method. +/// Implement the dump method. void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { LIS->print(O, m); } diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index e57ceab..04067a1 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H -#define LLVM_CODEGEN_REGISTER_COALESCER_H +#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H +#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H namespace llvm { @@ -22,38 +22,36 @@ namespace llvm { class TargetRegisterClass; class TargetInstrInfo; - /// CoalescerPair - A helper class for register coalescers. When deciding if + /// A helper class for register coalescers. When deciding if /// two registers can be coalesced, CoalescerPair can determine if a copy /// instruction would become an identity copy after coalescing. class CoalescerPair { const TargetRegisterInfo &TRI; - /// DstReg - The register that will be left after coalescing. It can be a + /// The register that will be left after coalescing. It can be a /// virtual or physical register. unsigned DstReg; - /// SrcReg - the virtual register that will be coalesced into dstReg. + /// The virtual register that will be coalesced into dstReg. unsigned SrcReg; - /// DstIdx - The sub-register index of the old DstReg in the new coalesced - /// register. + /// The sub-register index of the old DstReg in the new coalesced register. unsigned DstIdx; - /// SrcIdx - The sub-register index of the old SrcReg in the new coalesced - /// register. + /// The sub-register index of the old SrcReg in the new coalesced register. unsigned SrcIdx; - /// Partial - True when the original copy was a partial subregister copy. + /// True when the original copy was a partial subregister copy. bool Partial; - /// CrossClass - True when both regs are virtual, and newRC is constrained. + /// True when both regs are virtual and newRC is constrained. bool CrossClass; - /// Flipped - True when DstReg and SrcReg are reversed from the original + /// True when DstReg and SrcReg are reversed from the original /// copy instruction. bool Flipped; - /// NewRC - The register class of the coalesced register, or NULL if DstReg + /// The register class of the coalesced register, or NULL if DstReg /// is a physreg. This register class may be a super-register of both /// SrcReg and DstReg. const TargetRegisterClass *NewRC; @@ -70,49 +68,47 @@ namespace llvm { : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} - /// setRegisters - set registers to match the copy instruction MI. Return + /// Set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. bool setRegisters(const MachineInstr*); - /// flip - Swap SrcReg and DstReg. Return false if swapping is impossible + /// Swap SrcReg and DstReg. Return false if swapping is impossible /// because DstReg is a physical register, or SubIdx is set. bool flip(); - /// isCoalescable - Return true if MI is a copy instruction that will become + /// Return true if MI is a copy instruction that will become /// an identity copy after coalescing. bool isCoalescable(const MachineInstr*) const; - /// isPhys - Return true if DstReg is a physical register. + /// Return true if DstReg is a physical register. bool isPhys() const { return !NewRC; } - /// isPartial - Return true if the original copy instruction did not copy + /// Return true if the original copy instruction did not copy /// the full register, but was a subreg operation. bool isPartial() const { return Partial; } - /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller + /// Return true if DstReg is virtual and NewRC is a smaller /// register class than DstReg's. bool isCrossClass() const { return CrossClass; } - /// isFlipped - Return true when getSrcReg is the register being defined by + /// Return true when getSrcReg is the register being defined by /// the original copy instruction. bool isFlipped() const { return Flipped; } - /// getDstReg - Return the register (virtual or physical) that will remain + /// Return the register (virtual or physical) that will remain /// after coalescing. unsigned getDstReg() const { return DstReg; } - /// getSrcReg - Return the virtual register that will be coalesced away. + /// Return the virtual register that will be coalesced away. unsigned getSrcReg() const { return SrcReg; } - /// getDstIdx - Return the subregister index that DstReg will be coalesced - /// into, or 0. + /// Return the subregister index that DstReg will be coalesced into, or 0. unsigned getDstIdx() const { return DstIdx; } - /// getSrcIdx - Return the subregister index that SrcReg will be coalesced - /// into, or 0. + /// Return the subregister index that SrcReg will be coalesced into, or 0. unsigned getSrcIdx() const { return SrcIdx; } - /// getNewRC - Return the register class of the coalesced register. + /// Return the register class of the coalesced register. const TargetRegisterClass *getNewRC() const { return NewRC; } }; } // End llvm namespace diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 617e459..9925efb 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -185,7 +184,7 @@ void RegPressureTracker::init(const MachineFunction *mf, reset(); MF = mf; - TRI = MF->getTarget().getRegisterInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); RCI = rci; MRI = &MF->getRegInfo(); MBB = mbb; diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 72b6285..7626dd2 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -24,24 +24,16 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "reg-scavenging" -/// setUsed - Set the register and its sub-registers as being used. -void RegScavenger::setUsed(unsigned Reg) { - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - RegsAvailable.reset(*SubRegs); -} - -bool RegScavenger::isAliasUsed(unsigned Reg) const { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - if (isUsed(*AI, *AI == Reg)) - return true; - return false; +/// setUsed - Set the register units of this register as used. +void RegScavenger::setRegUsed(unsigned Reg) { + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + RegUnitsAvailable.reset(*RUI); } void RegScavenger::initRegState() { @@ -51,8 +43,8 @@ void RegScavenger::initRegState() { I->Restore = nullptr; } - // All registers started out unused. - RegsAvailable.set(); + // All register units start out unused. + RegUnitsAvailable.set(); if (!MBB) return; @@ -60,22 +52,21 @@ void RegScavenger::initRegState() { // Live-in registers are in use. for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) - setUsed(*I); + setRegUsed(*I); // Pristine CSRs are also unavailable. BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) - setUsed(I); + setRegUsed(I); } void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { MachineFunction &MF = *mbb->getParent(); - const TargetMachine &TM = MF.getTarget(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); - assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && + assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) && "Target changed?"); // It is not possible to use the register scavenger after late optimization @@ -85,17 +76,11 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { // Self-initialize. if (!MBB) { - NumPhysRegs = TRI->getNumRegs(); - RegsAvailable.resize(NumPhysRegs); - KillRegs.resize(NumPhysRegs); - DefRegs.resize(NumPhysRegs); - - // Create callee-saved registers bitvector. - CalleeSavedRegs.resize(NumPhysRegs); - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); - if (CSRegs != nullptr) - for (unsigned i = 0; CSRegs[i]; ++i) - CalleeSavedRegs.set(CSRegs[i]); + NumRegUnits = TRI->getNumRegUnits(); + RegUnitsAvailable.resize(NumRegUnits); + KillRegUnits.resize(NumRegUnits); + DefRegUnits.resize(NumRegUnits); + TmpRegUnits.resize(NumRegUnits); } MBB = mbb; @@ -104,10 +89,9 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { Tracking = false; } -void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - BV.set(*SubRegs); +void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) { + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + BV.set(*RUI); } void RegScavenger::determineKillsAndDefs() { @@ -122,12 +106,25 @@ void RegScavenger::determineKillsAndDefs() { // predicated, conservatively assume "kill" markers do not actually kill the // register. Similarly ignores "dead" markers. bool isPred = TII->isPredicated(MI); - KillRegs.reset(); - DefRegs.reset(); + KillRegUnits.reset(); + DefRegUnits.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) - (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask()); + if (MO.isRegMask()) { + + TmpRegUnits.clear(); + for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) { + for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) { + if (MO.clobbersPhysReg(*RURI)) { + TmpRegUnits.set(RU); + break; + } + } + } + + // Apply the mask. + (isPred ? DefRegUnits : KillRegUnits) |= TmpRegUnits; + } if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); @@ -139,13 +136,13 @@ void RegScavenger::determineKillsAndDefs() { if (MO.isUndef()) continue; if (!isPred && MO.isKill()) - addRegWithSubRegs(KillRegs, Reg); + addRegUnits(KillRegUnits, Reg); } else { assert(MO.isDef()); if (!isPred && MO.isDead()) - addRegWithSubRegs(KillRegs, Reg); + addRegUnits(KillRegUnits, Reg); else - addRegWithSubRegs(DefRegs, Reg); + addRegUnits(DefRegUnits, Reg); } } } @@ -158,8 +155,8 @@ void RegScavenger::unprocess() { determineKillsAndDefs(); // Commit the changes. - setUsed(KillRegs); - setUnused(DefRegs); + setUsed(KillRegUnits); + setUnused(DefRegUnits); } if (MBBI == MBB->begin()) { @@ -208,7 +205,7 @@ void RegScavenger::forward() { if (MO.isUse()) { if (MO.isUndef()) continue; - if (!isUsed(Reg)) { + if (!isRegUsed(Reg)) { // Check if it's partial live: e.g. // D0 = insert_subreg D0, S0 // ... D0 @@ -219,15 +216,23 @@ void RegScavenger::forward() { // insert_subreg around causes both correctness and performance issues. bool SubUsed = false; for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - if (isUsed(*SubRegs)) { + if (isRegUsed(*SubRegs)) { SubUsed = true; break; } - if (!SubUsed) { + bool SuperUsed = false; + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) { + if (isRegUsed(*SR)) { + SuperUsed = true; + break; + } + } + if (!SubUsed && !SuperUsed) { MBB->getParent()->verify(nullptr, "In Register Scavenger"); llvm_unreachable("Using an undefined register!"); } (void)SubUsed; + (void)SuperUsed; } } else { assert(MO.isDef()); @@ -243,23 +248,23 @@ void RegScavenger::forward() { #endif // NDEBUG // Commit the changes. - setUnused(KillRegs); - setUsed(DefRegs); + setUnused(KillRegUnits); + setUsed(DefRegUnits); } -void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { - used = RegsAvailable; - used.flip(); - if (includeReserved) - used |= MRI->getReservedRegs(); - else - used.reset(MRI->getReservedRegs()); +bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { + if (includeReserved && isReserved(Reg)) + return true; + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + if (!RegUnitsAvailable.test(*RUI)) + return true; + return false; } unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (!isAliasUsed(*I)) { + if (!isRegUsed(*I)) { DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) << "\n"); return *I; @@ -273,13 +278,13 @@ BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { BitVector Mask(TRI->getNumRegs()); for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (!isAliasUsed(*I)) + if (!isRegUsed(*I)) Mask.set(*I); return Mask; } /// findSurvivorReg - Return the candidate register that is unused for the -/// longest after StargMII. UseMI is set to the instruction where the search +/// longest after StartMII. UseMI is set to the instruction where the search /// stopped. /// /// No more than InstrLimit instructions are inspected. @@ -375,9 +380,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Try to find a register that's unused if there is one, as then we won't - // have to spill. Search explicitly rather than masking out based on - // RegsAvailable, as RegsAvailable does not take aliases into account. - // That's what getRegsAvailable() is for. + // have to spill. BitVector Available = getRegsAvailable(RC); Available &= Candidates; if (Available.any()) @@ -388,7 +391,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); // If we found an unused register there is no reason to spill it. - if (!isAliasUsed(SReg)) { + if (!isRegUsed(SReg)) { DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); return SReg; } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 6a2a080..6f8b337 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -21,6 +21,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -35,11 +36,9 @@ static cl::opt StressSchedOpt( void SchedulingPriorityQueue::anchor() { } ScheduleDAG::ScheduleDAG(MachineFunction &mf) - : TM(mf.getTarget()), - TII(TM.getInstrInfo()), - TRI(TM.getRegisterInfo()), - MF(mf), MRI(mf.getRegInfo()), - EntrySU(), ExitSU() { + : TM(mf.getTarget()), TII(TM.getSubtargetImpl()->getInstrInfo()), + TRI(TM.getSubtargetImpl()->getRegisterInfo()), MF(mf), + MRI(mf.getRegInfo()), EntrySU(), ExitSU() { #ifndef NDEBUG StressSched = StressSchedOpt; #endif diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 0f8b21c..d8d8422 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -50,12 +50,11 @@ static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction")); ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, - const MachineLoopInfo &mli, - const MachineDominatorTree &mdt, + const MachineLoopInfo *mli, bool IsPostRAFlag, bool RemoveKillFlags, LiveIntervals *lis) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), FirstDbgValue(nullptr) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); @@ -64,7 +63,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, "Virtual registers must be removed prior to PostRA scheduling"); const TargetSubtargetInfo &ST = TM.getSubtarget(); - SchedModel.init(*ST.getSchedModel(), &ST, TII); + SchedModel.init(ST.getSchedModel(), &ST, TII); } /// getUnderlyingObjectFromInt - This is the function that does the work of @@ -110,7 +109,7 @@ static void getUnderlyingObjects(const Value *V, for (SmallVectorImpl::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { V = *I; - if (!Visited.insert(V)) + if (!Visited.insert(V).second) continue; if (Operator::getOpcode(V) == Instruction::IntToPtr) { const Value *O = @@ -512,9 +511,18 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineInstr *MIa, MachineInstr *MIb) { + const MachineFunction *MF = MIa->getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + // Cover a trivial case - no edge is need to itself. if (MIa == MIb) return false; + + // Let the target decide if memory accesses cannot possibly overlap. + if ((MIa->mayLoad() || MIa->mayStore()) && + (MIb->mayLoad() || MIb->mayStore())) + if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) + return false; // FIXME: Need to handle multiple memory operands to support all targets. if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) @@ -563,9 +571,9 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, AliasAnalysis::AliasResult AAResult = AA->alias( AliasAnalysis::Location(MMOa->getValue(), Overlapa, - UseTBAA ? MMOa->getTBAAInfo() : nullptr), + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), AliasAnalysis::Location(MMOb->getValue(), Overlapb, - UseTBAA ? MMOb->getTBAAInfo() : nullptr)); + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); return (AAResult != AliasAnalysis::NoAlias); } @@ -575,12 +583,12 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, - SmallPtrSet &Visited) { + SmallPtrSetImpl &Visited) { if (!SUa || !SUb || SUb == ExitSU) return *Depth; // Remember visited nodes. - if (!Visited.insert(SUb)) + if (!Visited.insert(SUb).second) return *Depth; // If there is _some_ dependency already in place, do not // descend any further. @@ -656,7 +664,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index f59c6cf..b2e4617 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 004c685..38833a4 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -78,7 +78,7 @@ ScoreboardHazardRecognizer(const InstrItineraryData *II, DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); else { // A nonempty itinerary must have a SchedModel. - IssueWidth = ItinData->SchedModel->IssueWidth; + IssueWidth = ItinData->SchedModel.IssueWidth; DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " << ScoreboardDepth << '\n'); } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7c42e4d..a1291ed 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17,7 +17,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -32,7 +34,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -76,6 +77,10 @@ namespace { "slicing"), cl::init(false)); + static cl::opt + MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), + cl::desc("DAG combiner may split indexing from loads")); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -87,56 +92,70 @@ namespace { bool LegalTypes; bool ForCodeSize; - // Worklist of all of the nodes that need to be simplified. - // - // This has the semantics that when adding to the worklist, - // the item added must be next to be processed. It should - // also only appear once. The naive approach to this takes - // linear time. - // - // To reduce the insert/remove time to logarithmic, we use - // a set and a vector to maintain our worklist. - // - // The set contains the items on the worklist, but does not - // maintain the order they should be visited. - // - // The vector maintains the order nodes should be visited, but may - // contain duplicate or removed nodes. When choosing a node to - // visit, we pop off the order stack until we find an item that is - // also in the contents set. All operations are O(log N). - SmallPtrSet WorkListContents; - SmallVector WorkListOrder; + /// \brief Worklist of all of the nodes that need to be simplified. + /// + /// This must behave as a stack -- new nodes to process are pushed onto the + /// back and when processing we pop off of the back. + /// + /// The worklist will not contain duplicates but may contain null entries + /// due to nodes being deleted from the underlying DAG. + SmallVector Worklist; + + /// \brief Mapping from an SDNode to its position on the worklist. + /// + /// This is used to find and remove nodes from the worklist (by nulling + /// them) when they are deleted from the underlying DAG. It relies on + /// stable indices of nodes within the worklist. + DenseMap WorklistMap; + + /// \brief Set of nodes which have been combined (at least once). + /// + /// This is used to allow us to reliably add any operands of a DAG node + /// which have not yet been combined to the worklist. + SmallPtrSet CombinedNodes; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; - /// AddUsersToWorkList - When an instruction is simplified, add all users of - /// the instruction to the work lists because they might get more simplified - /// now. - /// - void AddUsersToWorkList(SDNode *N) { + /// When an instruction is simplified, add all users of the instruction to + /// the work lists because they might get more simplified now. + void AddUsersToWorklist(SDNode *N) { for (SDNode *Node : N->uses()) - AddToWorkList(Node); + AddToWorklist(Node); } - /// visit - call the node-specific routine that knows how to fold each - /// particular type of node. + /// Call the node-specific routine that folds each particular type of node. SDValue visit(SDNode *N); public: - /// AddToWorkList - Add to the work list making sure its instance is at the - /// back (next to be processed.) - void AddToWorkList(SDNode *N) { - WorkListContents.insert(N); - WorkListOrder.push_back(N); + /// Add to the worklist making sure its instance is at the back (next to be + /// processed.) + void AddToWorklist(SDNode *N) { + // Skip handle nodes as they can't usefully be combined and confuse the + // zero-use deletion strategy. + if (N->getOpcode() == ISD::HANDLENODE) + return; + + if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) + Worklist.push_back(N); } - /// removeFromWorkList - remove all instances of N from the worklist. - /// - void removeFromWorkList(SDNode *N) { - WorkListContents.erase(N); + /// Remove all instances of N from the worklist. + void removeFromWorklist(SDNode *N) { + CombinedNodes.erase(N); + + auto It = WorklistMap.find(N); + if (It == WorklistMap.end()) + return; // Not in the worklist. + + // Null out the entry rather than erasing it to avoid a linear operation. + Worklist[It->second] = nullptr; + WorklistMap.erase(It); } + void deleteAndRecombine(SDNode *N); + bool recursivelyDeleteUnusedNodes(SDNode *N); + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); @@ -154,9 +173,9 @@ namespace { private: - /// SimplifyDemandedBits - Check the specified integer node value to see if - /// it can be simplified or if things it uses can be simplified by bit - /// propagation. If so, return true. + /// Check the specified integer node value to see if it can be simplified or + /// if things it uses can be simplified by bit propagation. + /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); APInt Demanded = APInt::getAllOnesValue(BitWidth); @@ -167,6 +186,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed @@ -192,7 +212,7 @@ namespace { SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType); - /// combine - call the node-specific routine that knows how to fold each + /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. SDValue combine(SDNode *N); @@ -256,6 +276,7 @@ namespace { SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); + SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); SDValue visitSINT_TO_FP(SDNode *N); SDValue visitUINT_TO_FP(SDNode *N); @@ -269,6 +290,8 @@ namespace { SDValue visitFCEIL(SDNode *N); SDValue visitFTRUNC(SDNode *N); SDValue visitFFLOOR(SDNode *N); + SDValue visitFMINNUM(SDNode *N); + SDValue visitFMAXNUM(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -304,7 +327,12 @@ namespace { SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); + SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue BuildReciprocalEstimate(SDValue Op); + SDValue BuildRsqrtEstimate(SDValue Op); + SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); + SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -321,17 +349,16 @@ namespace { SDValue GetDemandedBits(SDValue V, const APInt &Mask); - /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases); - /// isAlias - Return true if there is any possibility that the two addresses - /// overlap. + /// Return true if there is any possibility that the two addresses overlap. bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; - /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, - /// looking for a better chain (aliasing node.) + /// Walk up chain skipping non-aliasing memory nodes, looking for a better + /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); /// Merge consecutive store operations into a wide store. @@ -359,13 +386,13 @@ namespace { FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); } - /// Run - runs the dag combiner on all nodes in the work list + /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); SelectionDAG &getDAG() const { return DAG; } - /// getShiftAmountTy - Returns a type large enough to hold any valid - /// shift amount - before type legalization these can be huge. + /// Returns a type large enough to hold any valid shift amount - before type + /// legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) @@ -374,15 +401,14 @@ namespace { : TLI.getPointerTy(); } - /// isTypeLegal - This method returns true if we are running before type - /// legalization or if the specified VT is legal. + /// This method returns true if we are running before type legalization or + /// if the specified VT is legal. bool isTypeLegal(const EVT &VT) { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } - /// getSetCCResultType - Convenience wrapper around - /// TargetLowering::getSetCCResultType + /// Convenience wrapper around TargetLowering::getSetCCResultType EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); } @@ -391,16 +417,16 @@ namespace { namespace { -/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted +/// This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. -class WorkListRemover : public SelectionDAG::DAGUpdateListener { +class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; public: - explicit WorkListRemover(DAGCombiner &dc) + explicit WorklistRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} void NodeDeleted(SDNode *N, SDNode *E) override { - DC.removeFromWorkList(N); + DC.removeFromWorklist(N); } }; } @@ -410,11 +436,11 @@ public: //===----------------------------------------------------------------------===// void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { - ((DAGCombiner*)DC)->AddToWorkList(N); + ((DAGCombiner*)DC)->AddToWorklist(N); } void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { - ((DAGCombiner*)DC)->removeFromWorkList(N); + ((DAGCombiner*)DC)->removeFromWorklist(N); } SDValue TargetLowering::DAGCombinerInfo:: @@ -442,9 +468,24 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Helper Functions //===----------------------------------------------------------------------===// -/// isNegatibleForFree - Return 1 if we can compute the negated form of the -/// specified expression for the same cost as the expression itself, or 2 if we -/// can compute the negated form more cheaply than the expression itself. +void DAGCombiner::deleteAndRecombine(SDNode *N) { + removeFromWorklist(N); + + // If the operands of this node are only used by the node, they will now be + // dead. Make sure to re-visit them and recursively delete dead nodes. + for (const SDValue &Op : N->ops()) + // For an operand generating multiple values, one of the values may + // become dead allowing further simplification (e.g. split index + // arithmetic from an indexed load). + if (Op->hasOneUse() || Op->getNumValues() > 1) + AddToWorklist(Op.getNode()); + + DAG.DeleteNode(N); +} + +/// Return 1 if we can compute the negated form of the specified expression for +/// the same cost as the expression itself, or 2 if we can compute the negated +/// form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, @@ -507,10 +548,10 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, } } -/// GetNegatedExpression - If isNegatibleForFree returns true, this function -/// returns the newly negated expression. +/// If isNegatibleForFree returns true, return the newly negated expression. static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, unsigned Depth = 0) { + const TargetOptions &Options = DAG.getTarget().Options; // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); @@ -527,12 +568,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(DAG.getTarget().Options.UnsafeFPMath); + assert(Options.UnsafeFPMath); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, Depth+1)) + DAG.getTargetLoweringInfo(), &Options, Depth+1)) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -544,7 +584,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, Op.getOperand(0)); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(DAG.getTarget().Options.UnsafeFPMath); + assert(Options.UnsafeFPMath); // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast(Op.getOperand(0))) @@ -557,12 +597,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); + assert(!Options.HonorSignDependentRoundingFPMath()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, Depth+1)) + DAG.getTargetLoweringInfo(), &Options, Depth+1)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -587,7 +626,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } -// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// Return true if this node is a setcc, or is a select_cc // that selects between the target values used for true and false, making it // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to // the appropriate nodes based on the type of node we are checking. This @@ -606,15 +645,19 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, !TLI.isConstFalseVal(N.getOperand(3).getNode())) return false; + if (TLI.getBooleanContents(N.getValueType()) == + TargetLowering::UndefinedBooleanContent) + return false; + LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(4); return true; } -// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only -// one use. If this is true, it allows the users to invert the operation for -// free when it is profitable to do so. +/// Return true if this is a SetCC-equivalent operation with only one use. +/// If this is true, it allows the users to invert the operation for free when +/// it is profitable to do so. bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) @@ -622,7 +665,7 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// Returns true if N is a BUILD_VECTOR node whose /// elements are all the same constant or undefined. static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { BuildVectorSDNode *C = dyn_cast(N); @@ -643,7 +686,7 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { if (isa(N)) return N.getNode(); BuildVectorSDNode *BV = dyn_cast(N); - if(BV && BV->isConstant()) + if (BV && BV->isConstant()) return BV; return nullptr; } @@ -669,6 +712,23 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) { return nullptr; } +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// float. +static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { + if (ConstantFPSDNode *CN = dyn_cast(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); + + if (CN && UndefElements.none()) + return CN; + } + + return nullptr; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -687,7 +747,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); if (!OpNode.getNode()) return SDValue(); - AddToWorkList(OpNode.getNode()); + AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } @@ -708,7 +768,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); if (!OpNode.getNode()) return SDValue(); - AddToWorkList(OpNode.getNode()); + AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); } } @@ -730,14 +790,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To); if (AddTo) { // Push the new nodes and any users onto the worklist for (unsigned i = 0, e = NumTo; i != e; ++i) { if (To[i].getNode()) { - AddToWorkList(To[i].getNode()); - AddUsersToWorkList(To[i].getNode()); + AddToWorklist(To[i].getNode()); + AddUsersToWorklist(To[i].getNode()); } } } @@ -745,14 +805,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. - if (N->use_empty()) { - // Nodes can be reintroduced into the worklist. Make sure we do not - // process a node that has been replaced. - removeFromWorkList(N); - - // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); - } + if (N->use_empty()) + deleteAndRecombine(N); return SDValue(N, 0); } @@ -760,32 +814,22 @@ void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. - AddToWorkList(TLO.New.getNode()); - AddUsersToWorkList(TLO.New.getNode()); + AddToWorklist(TLO.New.getNode()); + AddUsersToWorklist(TLO.New.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. - if (TLO.Old.getNode()->use_empty()) { - removeFromWorkList(TLO.Old.getNode()); - - // If the operands of this node are only used by the node, they will now - // be dead. Make sure to visit them first to delete dead nodes early. - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) - if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) - AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); - - DAG.DeleteNode(TLO.Old.getNode()); - } + if (TLO.Old.getNode()->use_empty()) + deleteAndRecombine(TLO.Old.getNode()); } -/// SimplifyDemandedBits - Check the specified integer node value to see if -/// it can be simplified or if things it uses can be simplified by bit -/// propagation. If so, return true. +/// Check the specified integer node value to see if it can be simplified or if +/// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; @@ -793,7 +837,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { return false; // Revisit the node. - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); // Replace the old value with the new one. ++NodesCombined; @@ -817,12 +861,11 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { dbgs() << "\nWith: "; Trunc.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); - removeFromWorkList(Load); - DAG.DeleteNode(Load); - AddToWorkList(Trunc.getNode()); + deleteAndRecombine(Load); + AddToWorklist(Trunc.getNode()); } SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { @@ -872,7 +915,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) return SDValue(); - AddToWorkList(NewOp.getNode()); + AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); @@ -887,16 +930,16 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (!NewOp.getNode()) return SDValue(); - AddToWorkList(NewOp.getNode()); + AddToWorklist(NewOp.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); return DAG.getZeroExtendInReg(NewOp, dl, OldVT); } -/// PromoteIntBinOp - Promote the specified integer binary operation if the -/// target indicates it is beneficial. e.g. On x86, it's usually better to -/// promote i16 operations to i32 since i16 instructions are longer. +/// Promote the specified integer binary operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (!LegalOperations) return SDValue(); @@ -934,9 +977,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { return SDValue(); } - AddToWorkList(NN0.getNode()); + AddToWorklist(NN0.getNode()); if (NN1.getNode()) - AddToWorkList(NN1.getNode()); + AddToWorklist(NN1.getNode()); if (Replace0) ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); @@ -952,9 +995,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { return SDValue(); } -/// PromoteIntShiftOp - Promote the specified integer shift operation if the -/// target indicates it is beneficial. e.g. On x86, it's usually better to -/// promote i16 operations to i32 since i16 instructions are longer. +/// Promote the specified integer shift operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (!LegalOperations) return SDValue(); @@ -986,7 +1029,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (!N0.getNode()) return SDValue(); - AddToWorkList(N0.getNode()); + AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); @@ -1066,17 +1109,45 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { dbgs() << "\nTo: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); - removeFromWorkList(N); - DAG.DeleteNode(N); - AddToWorkList(Result.getNode()); + deleteAndRecombine(N); + AddToWorklist(Result.getNode()); return true; } return false; } +/// \brief Recursively delete a node which has no uses and any operands for +/// which it is the only use. +/// +/// Note that this both deletes the nodes and removes them from the worklist. +/// It also adds any nodes who have had a user deleted to the worklist as they +/// may now have only one use and subject to other combines. +bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { + if (!N->use_empty()) + return false; + + SmallSetVector Nodes; + Nodes.insert(N); + do { + N = Nodes.pop_back_val(); + if (!N) + continue; + + if (N->use_empty()) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Nodes.insert(N->getOperand(i).getNode()); + + removeFromWorklist(N); + DAG.DeleteNode(N); + } else { + AddToWorklist(N); + } + } while (!Nodes.empty()); + return true; +} //===----------------------------------------------------------------------===// // Main DAG Combiner implementation @@ -1088,44 +1159,69 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; + // Early exit if this basic block is in an optnone function. + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeNone)) + return; + // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) - AddToWorkList(I); + AddToWorklist(I); // Create a dummy node (which is not added to allnodes), that adds a reference // to the root node, preventing it from being deleted, and tracking any // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // The root of the dag may dangle to deleted nodes until the dag combiner is - // done. Set it to null to avoid confusion. - DAG.setRoot(SDValue()); - // while the worklist isn't empty, find a node and // try and combine it. - while (!WorkListContents.empty()) { + while (!WorklistMap.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain - // duplicates. - // In order to avoid a linear scan, we use a set (O(log N)) to hold what the - // worklist *should* contain, and check the node we want to visit is should - // actually be visited. + // The Worklist holds the SDNodes in order, but it may contain null entries. do { - N = WorkListOrder.pop_back_val(); - } while (!WorkListContents.erase(N)); + N = Worklist.pop_back_val(); + } while (!N); + + bool GoodWorklistEntry = WorklistMap.erase(N); + (void)GoodWorklistEntry; + assert(GoodWorklistEntry && + "Found a worklist entry without a corresponding map entry!"); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a // reduced number of uses, allowing other xforms. - if (N->use_empty() && N != &Dummy) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorkList(N->getOperand(i).getNode()); - - DAG.DeleteNode(N); + if (recursivelyDeleteUnusedNodes(N)) continue; + + WorklistRemover DeadNodes(*this); + + // If this combine is running after legalizing the DAG, re-legalize any + // nodes pulled off the worklist. + if (Level == AfterLegalizeDAG) { + SmallSetVector UpdatedNodes; + bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); + + for (SDNode *LN : UpdatedNodes) { + AddToWorklist(LN); + AddUsersToWorklist(LN); + } + if (!NIsValid) + continue; } + DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + + // Add any operands of the new node which have not yet been combined to the + // worklist as well. Because the worklist uniques things already, this + // won't repeatedly process the same operand. + CombinedNodes.insert(N); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (!CombinedNodes.count(N->getOperand(i).getNode())) + AddToWorklist(N->getOperand(i).getNode()); + SDValue RV = combine(N); if (!RV.getNode()) @@ -1144,15 +1240,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(dbgs() << "\nReplacing.3 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - RV.getNode()->dump(&DAG); - dbgs() << '\n'); + DEBUG(dbgs() << " ... into: "; + RV.getNode()->dump(&DAG)); // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); - WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { @@ -1163,26 +1255,14 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } // Push the new node and any users onto the worklist - AddToWorkList(RV.getNode()); - AddUsersToWorkList(RV.getNode()); - - // Add any uses of the old node to the worklist in case this node is the - // last one that uses them. They may become dead after this node is - // deleted. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorkList(N->getOperand(i).getNode()); + AddToWorklist(RV.getNode()); + AddUsersToWorklist(RV.getNode()); // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to - // something else needing this node. - if (N->use_empty()) { - // Nodes can be reintroduced into the worklist. Make sure we do not - // process a node that has been replaced. - removeFromWorkList(N); - - // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); - } + // something else needing this node. This will also take care of adding any + // operands which have lost a user to the worklist. + recursivelyDeleteUnusedNodes(N); } // If the root changed (e.g. it was a dead load, update the root). @@ -1244,6 +1324,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); + case ISD::FSQRT: return visitFSQRT(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); @@ -1255,6 +1336,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FMINNUM: return visitFMINNUM(N); + case ISD::FMAXNUM: return visitFMAXNUM(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); @@ -1347,8 +1430,8 @@ SDValue DAGCombiner::combine(SDNode *N) { return RV; } -/// getInputChainForNode - Given a node, return its input chain if it has one, -/// otherwise return a null sd operand. +/// Given a node, return its input chain if it has one, otherwise return a null +/// sd operand. static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) @@ -1402,7 +1485,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // Queue up for processing. TFs.push_back(Op.getNode()); // Clean up in case the token factor is removed. - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); Changed = true; break; } @@ -1410,7 +1493,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { default: // Only add if it isn't already in the list. - if (SeenOps.insert(Op.getNode())) + if (SeenOps.insert(Op.getNode()).second) Ops.push_back(Op); else Changed = true; @@ -1440,44 +1523,21 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); // Replacing results may cause a different MERGE_VALUES to suddenly // be CSE'd with N, and carry its uses with it. Iterate until no // uses remain, to ensure that the node can be safely deleted. // First add the users of this node to the work list so that they // can be tried again once they have new operands. - AddUsersToWorkList(N); + AddUsersToWorklist(N); do { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); } while (!N->use_empty()); - removeFromWorkList(N); - DAG.DeleteNode(N); + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } -static -SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, - SelectionDAG &DAG) { - EVT VT = N0.getValueType(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); - ConstantSDNode *N01C = dyn_cast(N01); - - if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && - isa(N00.getOperand(1))) { - // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<getOperand(0); SDValue N1 = N->getOperand(1); @@ -1594,16 +1654,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } } - // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<hasOneUse()) { - SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); - if (Result.getNode()) return Result; - } - if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); - if (Result.getNode()) return Result; - } - // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB) @@ -1647,6 +1697,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } + // add X, (sextinreg Y i1) -> sub X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); + } + } + return SDValue(); } @@ -1812,6 +1873,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { VT); } + // sub X, (sextinreg Y i1) -> add X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, VT)); + return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); + } + } + return SDValue(); } @@ -1933,7 +2005,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { isa(N0.getOperand(1)))) { SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); - AddToWorkList(C3.getNode()); + AddToWorklist(C3.getNode()); return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } @@ -2016,9 +2088,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. - if (TLI.isPow2DivCheap()) + if (TLI.isPow2SDivCheap()) return SDValue(); + // Target-specific implementation of sdiv x, pow2. + SDValue Res = BuildSDIVPow2(N); + if (Res.getNode()) + return Res; + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register @@ -2026,7 +2103,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, DAG.getConstant(VT.getScalarSizeInBits() - 1, getShiftAmountTy(N0.getValueType()))); - AddToWorkList(SGN.getNode()); + AddToWorklist(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue SRL = @@ -2034,8 +2111,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { DAG.getConstant(VT.getScalarSizeInBits() - lg2, getShiftAmountTy(SGN.getValueType()))); SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); - AddToWorkList(SRL.getNode()); - AddToWorkList(ADD.getNode()); // Divide by pow2 + AddToWorklist(SRL.getNode()); + AddToWorklist(ADD.getNode()); // Divide by pow2 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); @@ -2044,7 +2121,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (N1C->getAPIntValue().isNonNegative()) return SRA; - AddToWorkList(SRA.getNode()); + AddToWorklist(SRA.getNode()); return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } @@ -2096,7 +2173,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { DAG.getConstant(SHC->getAPIntValue() .logBase2(), ADDVT)); - AddToWorkList(Add.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); } } @@ -2138,13 +2215,13 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); - AddToWorkList(Div.getNode()); + AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorkList(Mul.getNode()); + AddToWorklist(Mul.getNode()); return Sub; } } @@ -2181,7 +2258,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); - AddToWorkList(Add.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); } } @@ -2191,13 +2268,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); - AddToWorkList(Div.getNode()); + AddToWorklist(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); - AddToWorkList(Mul.getNode()); + AddToWorklist(Mul.getNode()); return Sub; } } @@ -2286,10 +2363,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return SDValue(); } -/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that -/// compute two values. LoOp and HiOp give the opcodes for the two computations -/// that are being performed. Return true if a simplification was made. -/// +/// Perform optimizations common to nodes that compute two values. LoOp and HiOp +/// give the opcodes for the two computations that are being performed. Return +/// true if a simplification was made. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp) { // If the high half is not needed, just compute the low half. @@ -2297,8 +2373,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!HiExists && (!LegalOperations || TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { - SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - ArrayRef(N->op_begin(), N->op_end())); + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); return CombineTo(N, Res, Res); } @@ -2307,8 +2382,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!LoExists && (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { - SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - ArrayRef(N->op_begin(), N->op_end())); + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); return CombineTo(N, Res, Res); } @@ -2318,9 +2392,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { - SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - ArrayRef(N->op_begin(), N->op_end())); - AddToWorkList(Lo.getNode()); + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); + AddToWorklist(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && (!LegalOperations || @@ -2329,9 +2402,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } if (HiExists) { - SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - ArrayRef(N->op_begin(), N->op_end())); - AddToWorkList(Hi.getNode()); + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); + AddToWorklist(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && (!LegalOperations || @@ -2436,8 +2508,8 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) { return SDValue(); } -/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with -/// two operands of the same opcode, try to simplify it. +/// If this is a binary operator with two operands of the same opcode, try to +/// simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); EVT VT = N0.getValueType(); @@ -2470,7 +2542,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); } @@ -2484,7 +2556,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode, N0.getOperand(1)); } @@ -2509,7 +2581,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); return BC; } } @@ -2556,7 +2628,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0->getOperand(0), N1->getOperand(0)); - AddToWorkList(NewNode.getNode()); + AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, &SVN0->getMask()[0]); } @@ -2577,7 +2649,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0->getOperand(1), N1->getOperand(1)); - AddToWorkList(NewNode.getNode()); + AddToWorklist(NewNode.getNode()); return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, &SVN0->getMask()[0]); } @@ -2603,9 +2675,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0, because undef node may exist in N0 + return DAG.getConstant( + APInt::getNullValue( + N0.getValueType().getScalarType().getSizeInBits()), + N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; + // do not return N1, because undef node may exist in N1 + return DAG.getConstant( + APInt::getNullValue( + N1.getValueType().getScalarType().getSizeInBits()), + N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -2768,21 +2848,21 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (cast(LR)->isNullValue() && Op1 == ISD::SETEQ) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ANDNode.getNode()); + AddToWorklist(ANDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } @@ -2795,7 +2875,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { cast(RR)->isNullValue()))) { SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), LL, DAG.getConstant(1, LL.getValueType())); - AddToWorkList(ADDNode.getNode()); + AddToWorklist(ADDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ADDNode, DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); } @@ -2843,7 +2923,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2863,7 +2943,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2894,7 +2974,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); - AddToWorkList(N); + AddToWorklist(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2921,7 +3001,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { Alignment = MinAlign(Alignment, PtrOff); } - AddToWorkList(NewPtr.getNode()); + AddToWorklist(NewPtr.getNode()); EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = @@ -2929,8 +3009,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment, LN0->getTBAAInfo()); - AddToWorkList(N); + LN0->isInvariant(), Alignment, LN0->getAAInfo()); + AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -2976,8 +3056,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(); } -/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 -/// +/// Match (a >> 8) | (a << 8) as (bswap a) >> 16. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits) { if (!LegalOperations) @@ -3082,10 +3161,13 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, return Res; } -/// isBSwapHWordElement - Return true if the specified node is an element -/// that makes up a 32-bit packed halfword byteswap. i.e. -/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) -static bool isBSwapHWordElement(SDValue N, SmallVectorImpl &Parts) { +/// Return true if the specified node is an element that makes up a 32-bit +/// packed halfword byteswap. +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) +static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { if (!N.getNode()->hasOneUse()) return false; @@ -3152,8 +3234,11 @@ static bool isBSwapHWordElement(SDValue N, SmallVectorImpl &Parts) { return true; } -/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is -/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +/// Match a 32-bit packed halfword bswap. That is +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) /// => (rotl (bswap x), 16) SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!LegalOperations) @@ -3165,7 +3250,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); - SmallVector Parts(4, (SDNode*)nullptr); // Look for either // (or (or (and), (and)), (or (and), (and))) // (or (or (or (and), (and)), (and)), (and)) @@ -3173,6 +3257,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); + SDNode *Parts[4] = {}; if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { @@ -3246,15 +3331,25 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, -1) -> -1, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) - return N0; + // do not return N0, because undef node may exist in N0 + return DAG.getConstant( + APInt::getAllOnesValue( + N0.getValueType().getScalarType().getSizeInBits()), + N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) - return N1; + // do not return N1, because undef node may exist in N1 + return DAG.getConstant( + APInt::getAllOnesValue( + N1.getValueType().getScalarType().getSizeInBits()), + N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) // Do this only if the resulting shuffle is legal. if (isa(N0) && isa(N1) && + // Avoid folding a node with illegal type. + TLI.isTypeLegal(VT) && N0->getOperand(1) == N1->getOperand(1) && ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { bool CanFold = true; @@ -3366,7 +3461,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); - AddToWorkList(ORNode.getNode()); + AddToWorklist(ORNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) @@ -3375,7 +3470,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); - AddToWorkList(ANDNode.getNode()); + AddToWorklist(ANDNode.getNode()); return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } } @@ -3438,7 +3533,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return SDValue(); } -/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +/// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { if (isa(Op.getOperand(1))) { @@ -3735,7 +3830,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return RXOR; // fold !(x cc y) -> (x !cc y) - if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), isInt); @@ -3761,7 +3856,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue V = N0.getOperand(0); V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, DAG.getConstant(1, V.getValueType())); - AddToWorkList(V.getNode()); + AddToWorklist(V.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } @@ -3773,7 +3868,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } @@ -3785,7 +3880,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); + AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } @@ -3794,7 +3889,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N0->getOperand(1) == N1) { SDValue X = N0->getOperand(0); SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); - AddToWorkList(NotX.getNode()); + AddToWorklist(NotX.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) @@ -3828,8 +3923,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } -/// visitShiftByConstant - Handle transforms common to the three shifts, when -/// the shift amount is a constant. +/// Handle transforms common to the three shifts, when the shift amount is a +/// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { // We can't and shouldn't fold opaque constants. if (Amt->isOpaque()) @@ -4059,7 +4154,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT CountVT = NewOp0.getOperand(1).getValueType(); SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), NewOp0, DAG.getConstant(c2, CountVT)); - AddToWorkList(NewSHL.getNode()); + AddToWorklist(NewSHL.getNode()); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); } } @@ -4101,6 +4196,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { HiBitsMask); } + // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // Variant of version done on multiply, except mul by a power of 2 is turned + // into a shift. + APInt Val; + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isa(N0.getOperand(1)) || + isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); + SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); + } + if (N1C) { SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) @@ -4345,7 +4452,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); - AddToWorkList(SmallShift.getNode()); + AddToWorklist(SmallShift.getNode()); APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), @@ -4387,7 +4494,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (ShAmt) { Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } return DAG.getNode(ISD::XOR, SDLoc(N), VT, @@ -4439,12 +4546,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N->hasOneUse()) { SDNode *Use = *N->use_begin(); if (Use->getOpcode() == ISD::BRCOND) - AddToWorkList(Use); + AddToWorklist(Use); else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { // Also look pass the truncate. Use = *Use->use_begin(); if (Use->getOpcode() == ISD::BRCOND) - AddToWorkList(Use); + AddToWorklist(Use); } } @@ -4545,7 +4652,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { N0, DAG.getConstant(1, VT0)); XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, N0, DAG.getConstant(1, VT0)); - AddToWorkList(XORNode.getNode()); + AddToWorklist(XORNode.getNode()); if (VT.bitsGT(VT0)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); @@ -4553,13 +4660,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); - AddToWorkList(NOTNode.getNode()); + AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); - AddToWorkList(NOTNode.getNode()); + AddToWorklist(NOTNode.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) @@ -4582,7 +4689,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N0.getOpcode() == ISD::SETCC) { if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || - TLI.isOperationLegal(ISD::SELECT_CC, VT)) + TLI.isOperationLegal(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -4616,12 +4723,17 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); - MVT VT = N->getSimpleValueType(0); + EVT VT = N->getValueType(0); int NumElems = VT.getVectorNumElements(); assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR); + // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about + // binary ones here. + if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) + return SDValue(); + // We're sure we have an even number of elements due to the // concat_vectors we have as arguments to vselect. // Skip BV elements until we find one that's not an UNDEF @@ -4690,8 +4802,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { ISD::SRA, DL, VT, LHS, DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } } @@ -4718,8 +4830,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // Add the new VSELECT nodes to the work list in case they need to be split // again. - AddToWorkList(Lo.getNode()); - AddToWorkList(Hi.getNode()); + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); } @@ -4761,7 +4873,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false); if (SCC.getNode()) { - AddToWorkList(SCC.getNode()); + AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast(SCC.getNode())) { if (!SCCC->isNullValue()) @@ -4958,7 +5070,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5134,14 +5246,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { SDLoc DL(N); ISD::CondCode CC = cast(N0.getOperand(2))->get(); - SDValue SetCC = DAG.getSetCC(DL, - SetCCVT, + SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); - EVT SelectVT = getSetCCResultType(VT); - return DAG.getSelect(DL, VT, - DAG.getSExtOrTrunc(SetCC, DL, SelectVT), + return DAG.getSelect(DL, VT, SetCC, NegOne, DAG.getConstant(0, VT)); - } } } @@ -5239,7 +5347,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5257,7 +5365,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5265,10 +5373,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorkList(Op.getNode()); + AddToWorklist(Op.getNode()); } return DAG.getZeroExtendInReg(Op, SDLoc(N), N0.getValueType().getScalarType()); @@ -5487,7 +5595,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (NarrowLoad.getNode() != N0.getNode()) { CombineTo(N0.getNode(), NarrowLoad); // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorkList(oye); + AddToWorklist(oye); } return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -5528,8 +5636,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !cast(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) @@ -5614,9 +5721,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(); } -/// GetDemandedBits - See if the specified operand can be simplified with the -/// knowledge that only the bits specified by Mask are used. If so, return the -/// simpler operand, otherwise return a null SDValue. +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by Mask are used. If so, return the simpler operand, +/// otherwise return a null SDValue. SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; @@ -5657,11 +5764,11 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { return SDValue(); } -/// ReduceLoadWidth - If the result of a wider load is shifted to right of N -/// bits and then truncated to a narrower type and where N is a multiple -/// of number of bits of the narrower type, transform it to a narrower load -/// from address + N / num of bits of new type. If the result is to be -/// extended, also fold the extension to form a extending load. +/// If the result of a wider load is shifted to right of N bits and then +/// truncated to a narrower type and where N is a multiple of number of bits of +/// the narrower type, transform it to a narrower load from address + N / num of +/// bits of new type. If the result is to be extended, also fold the extension +/// to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); @@ -5786,22 +5893,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, PtrType)); - AddToWorkList(NewPtr.getNode()); + AddToWorklist(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); + LN0->isInvariant(), NewAlign, LN0->getAAInfo()); else Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign, LN0->getTBAAInfo()); + LN0->isInvariant(), NewAlign, LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); // Shift the result left, if we've swallowed a left shift. @@ -5900,7 +6007,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); - AddToWorkList(ExtLoad.getNode()); + AddToWorklist(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use @@ -6022,6 +6129,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // trunc (select c, a, b) -> select c, (trunc a), (trunc b) + if (N0.getOpcode() == ISD::SELECT) { + EVT SrcVT = N0.getValueType(); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && + TLI.isTruncateFree(SrcVT, VT)) { + SDLoc SL(N0); + SDValue Cond = N0.getOperand(0); + SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); + } + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to @@ -6121,7 +6241,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { continue; } SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); - AddToWorkList(NV.getNode()); + AddToWorklist(NV.getNode()); Opnds.push_back(NV); } return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); @@ -6143,7 +6263,7 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { return Elt.getOperand(Elt.getResNo()).getNode(); } -/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// build_pair (load, load) -> load /// if load locations are consecutive. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); @@ -6209,7 +6329,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // Ideally this won't happen very often, because instcombine // and the earlier dagcombine runs (where illegal nodes are // permitted) should have folded most of them already. - DAG.DeleteNode(Res.getNode()); + deleteAndRecombine(Res.getNode()); } } @@ -6238,12 +6358,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), OrigAlign, - LN0->getTBAAInfo()); - AddToWorkList(N); - CombineTo(N0.getNode(), - DAG.getNode(ISD::BITCAST, SDLoc(N0), - N0.getValueType(), Load), - Load.getValue(1)); + LN0->getAAInfo()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } } @@ -6257,7 +6373,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { !VT.isVector() && !N0.getValueType().isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); - AddToWorkList(NewConv.getNode()); + AddToWorklist(NewConv.getNode()); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) @@ -6280,34 +6396,34 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (isTypeLegal(IntXVT)) { SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), IntXVT, N0.getOperand(1)); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. X = DAG.getNode(ISD::SRL, SDLoc(X), X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, VT)); - AddToWorkList(X.getNode()); + AddToWorklist(X.getNode()); SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, VT)); - AddToWorkList(Cst.getNode()); + AddToWorklist(Cst.getNode()); return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } @@ -6328,9 +6444,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { return CombineConsecutiveLoads(N, VT); } -/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector -/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the -/// destination element value type. +/// We know that BV is a build_vector node with Constant, ConstantFP or Undef +/// operands. DstEltVT indicates the destination element value type. SDValue DAGCombiner:: ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); @@ -6363,7 +6478,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, Op)); - AddToWorkList(Ops.back().getNode()); + AddToWorklist(Ops.back().getNode()); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } @@ -6466,6 +6581,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6476,193 +6592,143 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); + // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); - // fold (fadd A, 0) -> A - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N1CFP->getValueAPF().isZero()) - return N0; + // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); + // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); - // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isa(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, SDLoc(N), VT, - N0.getOperand(1), N1)); + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // No FP constant should be created after legalization as Instruction + // Selection pass has a hard time dealing with FP constants. + bool AllowNewConst = (Level < AfterLegalizeDAG); - // No FP constant should be created after legalization as Instruction - // Selection pass has hard time in dealing with FP constant. - // - // We don't need test this condition for transformation like following, as - // the DAG being transformed implies it is legal to take FP constant as - // operand. - // - // (fadd (fmul c, x), x) -> (fmul c+1, x) - // - bool AllowNewFpConst = (Level < AfterLegalizeDAG); - - // If allow, fold (fadd (fneg x), x) -> 0.0 - if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, VT); - - // If allow, fold (fadd x, (fneg x)) -> 0.0 - if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, VT); - - // In unsafe math mode, we can fold chains of FADD's of the same value - // into multiplications. This transform is not safe in general because - // we are reducing the number of rounding steps. - if (DAG.getTarget().Options.UnsafeFPMath && - TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && - !N0CFP && !N1CFP) { - if (N0.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP00 = dyn_cast(N0.getOperand(0)); - ConstantFPSDNode *CFP01 = dyn_cast(N0.getOperand(1)); - - // (fadd (fmul c, x), x) -> (fmul x, c+1) - if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP00, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, NewCFP); - } + // fold (fadd A, 0) -> A + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N0; - // (fadd (fmul x, c), x) -> (fmul x, c+1) - if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, NewCFP); - } + // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isa(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, SDLoc(N), VT, + N0.getOperand(1), N1)); + + // If allowed, fold (fadd (fneg x), x) -> 0.0 + if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + return DAG.getConstantFP(0.0, VT); + + // If allowed, fold (fadd x, (fneg x)) -> 0.0 + if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + return DAG.getConstantFP(0.0, VT); + + // We can fold chains of FADD's of the same value into multiplications. + // This transform is not safe in general because we are reducing the number + // of rounding steps. + if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast(N0.getOperand(1)); + + // (fadd (fmul x, c), x) -> (fmul x, c+1) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); + } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) - if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP00, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(1), NewCFP); + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N0.getOperand(0), NewCFP); + } } - // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) - if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), NewCFP); - } - } + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast(N1.getOperand(1)); - if (N1.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); - ConstantFPSDNode *CFP11 = dyn_cast(N1.getOperand(1)); + // (fadd x, (fmul x, c)) -> (fmul x, c+1) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); + } - // (fadd x, (fmul c, x)) -> (fmul x, c+1) - if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP10, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, NewCFP); + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); + } } - // (fadd x, (fmul x, c)) -> (fmul x, c+1) - if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, NewCFP); + if (N0.getOpcode() == ISD::FADD && AllowNewConst) { + ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul x, 3.0) + if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1, DAG.getConstantFP(3.0, VT)); } - - // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) - if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && - N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(1) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP10, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1.getOperand(1), NewCFP); + if (N1.getOpcode() == ISD::FADD && AllowNewConst) { + ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N0, DAG.getConstantFP(3.0, VT)); } - // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) - if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) + if (AllowNewConst && + N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1.getOperand(0), NewCFP); - } - } - - if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { - ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); - // (fadd (fadd x, x), x) -> (fmul x, 3.0) - if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, DAG.getConstantFP(3.0, VT)); - } - - if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { - ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); - // (fadd x, (fadd x, x)) -> (fmul x, 3.0) - if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, DAG.getConstantFP(3.0, VT)); + N0.getOperand(0), DAG.getConstantFP(4.0, VT)); } - - // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) - if (AllowNewFpConst && - N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && - N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), - DAG.getConstantFP(4.0, VT)); - } + } // enable-unsafe-fp-math // FADD -> FMA combines: - if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || - DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); } @@ -6673,10 +6739,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - ConstantFPSDNode *N1CFP = dyn_cast(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); SDLoc dl(N); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6687,60 +6754,60 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); - // fold (fsub A, 0) -> A - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N1CFP->getValueAPF().isZero()) - return N0; - // fold (fsub 0, B) -> -B - if (DAG.getTarget().Options.UnsafeFPMath && - N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return GetNegatedExpression(N1, DAG, LegalOperations); - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, dl, VT, N1); - } + // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); - // If 'unsafe math' is enabled, fold - // (fsub x, x) -> 0.0 & - // (fsub x, (fadd x, y)) -> (fneg y) & - // (fsub x, (fadd y, x)) -> (fneg y) - if (DAG.getTarget().Options.UnsafeFPMath) { + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // (fsub A, 0) -> A + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + + // (fsub 0, B) -> -B + if (N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, dl, VT, N1); + } + + // (fsub x, x) -> 0.0 if (N0 == N1) return DAG.getConstantFP(0.0f, VT); + // (fsub x, (fadd x, y)) -> (fneg y) + // (fsub x, (fadd y, x)) -> (fneg y) if (N1.getOpcode() == ISD::FADD) { SDValue N10 = N1->getOperand(0); SDValue N11 = N1->getOperand(1); - if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, - &DAG.getTarget().Options)) + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) return GetNegatedExpression(N11, DAG, LegalOperations); - if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, - &DAG.getTarget().Options)) + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } // FSUB -> FMA combines: - if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || - DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), @@ -6749,7 +6816,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && - N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || + TLI.enableAggressiveFMAFusion(VT))) { SDValue N00 = N0.getOperand(0).getOperand(0); SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(ISD::FMA, dl, VT, @@ -6764,47 +6832,82 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - ConstantFPSDNode *N1CFP = dyn_cast(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { + // This just handles C1 * C2 for vectors. Other vector folds are below. SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (FoldedVOp.getNode()) + return FoldedVOp; + // Canonicalize vector constant to RHS. + if (N0.getOpcode() == ISD::BUILD_VECTOR && + N1.getOpcode() != ISD::BUILD_VECTOR) + if (auto *BV0 = dyn_cast(N0)) + if (BV0->isConstant()) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); } // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); + // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); - // fold (fmul A, 0) -> 0 - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N1CFP->getValueAPF().isZero()) - return N1; - // fold (fmul A, 0) -> 0, vector edition. - if (DAG.getTarget().Options.UnsafeFPMath && - ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; + // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) return N0; + + if (Options.UnsafeFPMath) { + // fold (fmul A, 0) -> 0 + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N1; + + // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (N0.getOpcode() == ISD::FMUL) { + // Fold scalars or any vector constants (not just splats). + // This fold is done in general by InstCombine, but extra fmul insts + // may have been generated during lowering. + SDValue N01 = N0.getOperand(1); + auto *BV1 = dyn_cast(N1); + auto *BV01 = dyn_cast(N01); + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDLoc SL(N); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + } + } + + // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) + // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs + // during an early run of DAGCombiner can prevent folding with fmuls + // inserted during lowering. + if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { + SDLoc SL(N); + const SDValue Two = DAG.getConstantFP(2.0, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); + } + } + // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); + // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, - &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, - &DAG.getTarget().Options)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -6814,14 +6917,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } } - // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N0.getOpcode() == ISD::FMUL && - N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(1), N1)); - return SDValue(); } @@ -6833,8 +6928,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); SDLoc dl(N); + const TargetOptions &Options = DAG.getTarget().Options; - if (DAG.getTarget().Options.UnsafeFPMath) { + // Constant fold FMA. + if (isa(N0) && + isa(N1) && + isa(N2)) { + return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); + } + + if (Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; if (N1CFP && N1CFP->isZero()) @@ -6850,7 +6953,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && N2.getOperand(1).getOpcode() == ISD::ConstantFP) { @@ -6860,7 +6963,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) - if (DAG.getTarget().Options.UnsafeFPMath && + if (Options.UnsafeFPMath && N0.getOpcode() == ISD::FMUL && N1CFP && N0.getOperand(1).getOpcode() == ISD::ConstantFP) { return DAG.getNode(ISD::FMA, dl, VT, @@ -6878,19 +6981,19 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP->isExactlyValue(-1.0) && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); - AddToWorkList(RHSNeg.getNode()); + AddToWorklist(RHSNeg.getNode()); return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); } } // (fma x, c, x) -> (fmul x, (c+1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) + if (Options.UnsafeFPMath && N1CFP && N0 == N2) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(1.0, VT))); // (fma x, c, (fneg x)) -> (fmul x, (c-1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, @@ -6906,7 +7009,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6918,30 +7022,79 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); - // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { - // Compute the reciprocal 1.0 / c2. - APFloat N1APF = N1CFP->getValueAPF(); - APFloat Recip(N1APF.getSemantics(), 1); // 1.0 - APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is a legal fp immediate that - // isn't too nasty (eg NaN, denormal, ...). - if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty - (!LegalOperations || - // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM - // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || - TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || - TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, - DAG.getConstantFP(Recip, VT)); + if (Options.UnsafeFPMath) { + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, + DAG.getConstantFP(Recip, VT)); + } + + // If this FDIV is part of a reciprocal square root, it may be folded + // into a target-specific square root estimate instruction. + if (N1.getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FP_ROUND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FMUL) { + // Look through an FMUL. Even though this won't remove the FDIV directly, + // it's still worthwhile to get rid of the FSQRT if possible. + SDValue SqrtOp; + SDValue OtherOp; + if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(0); + OtherOp = N1.getOperand(1); + } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(1); + OtherOp = N1.getOperand(0); + } + if (SqrtOp.getNode()) { + // We found a FSQRT, so try to make this fold: + // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) + if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } + } + + // Fold into a reciprocal estimate and multiply instead of a real divide. + if (SDValue RV = BuildReciprocalEstimate(N1)) { + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, - &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, - &DAG.getTarget().Options)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -6968,6 +7121,31 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFSQRT(SDNode *N) { + if (DAG.getTarget().Options.UnsafeFPMath) { + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) + if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { + EVT VT = RV.getValueType(); + RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); + AddToWorklist(RV.getNode()); + + // Unfortunately, RV is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + SDValue Zero = DAG.getConstantFP(0.0, VT); + SDValue ZeroCmp = + DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), + N->getOperand(0), Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + AddToWorklist(RV.getNode()); + + RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, + SDLoc(N), VT, ZeroCmp, Zero, RV); + return RV; + } + } + return SDValue(); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -7162,7 +7340,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); - AddToWorkList(Tmp.getNode()); + AddToWorklist(Tmp.getNode()); return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } @@ -7213,8 +7391,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - ((!LegalOperations && !cast(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { + TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { LoadSDNode *LN0 = cast(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -7231,6 +7408,43 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP) + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP) + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + + return SDValue(); +} + +// FIXME: FNEG and FABS have a lot in common; refactor. SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7240,24 +7454,36 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (FoldedVOp.getNode()) return FoldedVOp; } + // Constant fold FNEG. + if (isa(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); - // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading + // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. - if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && - !VT.isVector() && - N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger()) { + if (!TLI.isFNegFree(VT) && + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x80... per scalar element + // and splat it. + SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x80... + SignMask = APInt::getSignBit(IntVT.getSizeInBits()); + } Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, - DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); - AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - VT, Int); + DAG.getConstant(SignMask, IntVT)); + AddToWorklist(Int.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } } @@ -7279,45 +7505,50 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFCEIL(SDNode *N) { +SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - EVT VT = N->getValueType(0); - - // fold (fceil c1) -> fceil(c1) - if (N0CFP) - return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); - - return SDValue(); -} + SDValue N1 = N->getOperand(1); + const ConstantFPSDNode *N0CFP = dyn_cast(N0); + const ConstantFPSDNode *N1CFP = dyn_cast(N1); -SDValue DAGCombiner::visitFTRUNC(SDNode *N) { - SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - EVT VT = N->getValueType(0); + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); + } - // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP) - return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + if (N0CFP) { + EVT VT = N->getValueType(0); + // Canonicalize to constant on RHS. + return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); + } return SDValue(); } -SDValue DAGCombiner::visitFFLOOR(SDNode *N) { +SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); - EVT VT = N->getValueType(0); + SDValue N1 = N->getOperand(1); + const ConstantFPSDNode *N0CFP = dyn_cast(N0); + const ConstantFPSDNode *N1CFP = dyn_cast(N1); - // fold (ffloor c1) -> ffloor(c1) - if (N0CFP) - return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); + } + + if (N0CFP) { + EVT VT = N->getValueType(0); + // Canonicalize to constant on RHS. + return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); + } return SDValue(); } SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast(N0); EVT VT = N->getValueType(0); if (VT.isVector()) { @@ -7326,30 +7557,40 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { } // fold (fabs c1) -> fabs(c1) - if (N0CFP) + if (isa(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); - // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading // constant pool values. if (!TLI.isFAbsFree(VT) && - N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger() && - !N0.getOperand(0).getValueType().isVector()) { + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x7f... per scalar element + // and splat it. + SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x7f... + SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); + } Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, - DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); - AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - N->getValueType(0), Int); + DAG.getConstant(SignMask, IntVT)); + AddToWorklist(Int.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } } @@ -7429,15 +7670,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // will convert it back to (X & C1) >> C2. CombineTo(N, NewBRCond, false); // Truncate is dead. - if (Trunc) { - removeFromWorkList(Trunc); - DAG.DeleteNode(Trunc); - } + if (Trunc) + deleteAndRecombine(Trunc); // Replace the uses of SRL with SETCC - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorkList(N1.getNode()); - DAG.DeleteNode(N1.getNode()); + deleteAndRecombine(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -7464,10 +7702,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { dbgs() << "\nWith: "; Tmp.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - removeFromWorkList(TheXor); - DAG.DeleteNode(TheXor); + deleteAndRecombine(TheXor); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); } @@ -7495,10 +7732,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorkList(N1.getNode()); - DAG.DeleteNode(N1.getNode()); + deleteAndRecombine(N1.getNode()); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); } @@ -7523,7 +7759,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), CondLHS, CondRHS, CC->get(), SDLoc(N), false); - if (Simp.getNode()) AddToWorkList(Simp.getNode()); + if (Simp.getNode()) AddToWorklist(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) @@ -7535,9 +7771,8 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { return SDValue(); } -/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that -/// uses N as its base pointer and that N may be folded in the load / store -/// addressing mode. +/// Return true if 'Use' is a load or a store that uses N as its base pointer +/// and that N may be folded in the load / store addressing mode. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -7576,12 +7811,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); } -/// CombineToPreIndexedLoadStore - Try turning a load / store into a -/// pre-indexed load / store when the base pointer is an add or subtract -/// and it has other uses besides the load / store. After the -/// transformation, the new indexed load / store has effectively folded -/// the add / subtract in and all of its other uses are redirected to the -/// new load / store. +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; @@ -7733,7 +7967,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); @@ -7742,7 +7976,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); + deleteAndRecombine(N); if (Swapped) std::swap(BasePtr, Offset); @@ -7792,23 +8026,20 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); - removeFromWorkList(OtherUses[i]); - DAG.DeleteNode(OtherUses[i]); + deleteAndRecombine(OtherUses[i]); } // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); - removeFromWorkList(Ptr.getNode()); - DAG.DeleteNode(Ptr.getNode()); + deleteAndRecombine(Ptr.getNode()); return true; } -/// CombineToPostIndexedLoadStore - Try to combine a load / store with a -/// add / sub of the base pointer node into a post-indexed load / store. -/// The transformation folded the add / subtract into the new indexed -/// load / store effectively and all of its uses are redirected to the -/// new load / store. +/// Try to combine a load/store with a add/sub of the base pointer node into a +/// post-indexed load/store. The transformation folded the add/subtract into the +/// new indexed load/store effectively and all of its uses are redirected to the +/// new load/store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; @@ -7903,7 +8134,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); dbgs() << '\n'); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); @@ -7912,13 +8143,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { } // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); + deleteAndRecombine(N); // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), Result.getValue(isLoad ? 1 : 0)); - removeFromWorkList(Op); - DAG.DeleteNode(Op); + deleteAndRecombine(Op); return true; } } @@ -7927,6 +8157,30 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } +/// \brief Return the base-pointer arithmetic from an indexed \p LD. +SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + assert(AM != ISD::UNINDEXED); + SDValue BP = LD->getOperand(1); + SDValue Inc = LD->getOperand(2); + + // Some backends use TargetConstants for load offsets, but don't expect + // TargetConstants in general ADD nodes. We can convert these constants into + // regular Constants (if the constant is not opaque). + assert((Inc.getOpcode() != ISD::TargetConstant || + !cast(Inc)->isOpaque()) && + "Cannot split out indexing using opaque target constants"); + if (Inc.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *ConstInc = cast(Inc); + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), + ConstInc->getValueType(0)); + } + + unsigned Opc = + (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); + return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); +} + SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); @@ -7950,33 +8204,46 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); dbgs() << "\n"); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); - if (N->use_empty()) { - removeFromWorkList(N); - DAG.DeleteNode(N); - } + if (N->use_empty()) + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { + + // If this load has an opaque TargetConstant offset, then we cannot split + // the indexing into an add/sub directly (that TargetConstant may not be + // valid for a different type of node, and we cannot convert an opaque + // target constant into a regular constant). + bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && + cast(LD->getOperand(2))->isOpaque(); + + if (!N->hasAnyUseOfValue(0) && + ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + SDValue Index; + if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { + Index = SplitIndexingFromLoad(LD); + // Try to fold the base pointer arithmetic into subsequent loads and + // stores. + AddUsersToWorklist(N); + } else + Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); dbgs() << " and 2 other values\n"); - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), - DAG.getUNDEF(N->getValueType(1))); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); - removeFromWorkList(N); - DAG.DeleteNode(N); + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -8004,15 +8271,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align, - LD->getTBAAInfo()); + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), Align, LD->getAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : - TLI.getTargetMachine().getSubtarget().useAA(); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -8042,7 +8309,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { MVT::Other, Chain, ReplLoad.getValue(1)); // Make sure the new and old chains are cleaned up. - AddToWorkList(Token.getNode()); + AddToWorklist(Token.getNode()); // Replace uses with load result and token factor. Don't add users // to work list. @@ -8342,7 +8609,7 @@ struct LoadedSlice { // At this point, we know that we perform a cross-register-bank copy. // Check if it is expensive. - const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); // Assume bitcasts are cheap, unless both register classes do not // explicitly share a common sub class. if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) @@ -8606,9 +8873,9 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { return true; } -/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the -/// load is having specific bytes cleared out. If so, return the byte size -/// being masked out and the shift amount. +/// Check to see if V is (and load (ptr), imm), where the load is having +/// specific bytes cleared out. If so, return the byte size being masked out +/// and the shift amount. static std::pair CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { std::pair Result(0, 0); @@ -8681,9 +8948,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { } -/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that -/// provides a value as specified by MaskInfo. If so, replace the specified -/// store with a narrower store of truncated IVal. +/// Check to see if IVal is something that provides a value as specified by +/// MaskInfo. If so, replace the specified store with a narrower store of +/// truncated IVal. static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, SDValue IVal, StoreSDNode *St, @@ -8738,10 +9005,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, } -/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is -/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some -/// of the loaded bits, try narrowing the load and store if it would end up -/// being a win for performance or code size. +/// Look for sequence of load / op / store where op is one of 'or', 'xor', and +/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try +/// narrowing the load and store if it would end up being a win for performance +/// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast(N); if (ST->isVolatile()) @@ -8841,7 +9108,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), NewAlign, - LD->getTBAAInfo()); + LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), @@ -8849,10 +9116,10 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); - AddToWorkList(NewPtr.getNode()); - AddToWorkList(NewLD.getNode()); - AddToWorkList(NewVal.getNode()); - WorkListRemover DeadNodes(*this); + AddToWorklist(NewPtr.getNode()); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewVal.getNode()); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); ++OpsNarrowed; return NewST; @@ -8862,10 +9129,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); } -/// TransformFPLoadStorePair - For a given floating point load / store pair, -/// if the load value isn't used by any other operations, then consider -/// transforming the pair to integer load / store operations if the target -/// deems the transformation profitable. +/// For a given floating point load / store pair, if the load value isn't used +/// by any other operations, then consider transforming the pair to integer +/// load / store operations if the target deems the transformation profitable. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { StoreSDNode *ST = cast(N); SDValue Chain = ST->getChain(); @@ -8907,9 +9173,9 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { ST->getPointerInfo(), false, false, STAlign); - AddToWorkList(NewLD.getNode()); - AddToWorkList(NewST.getNode()); - WorkListRemover DeadNodes(*this); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewST.getNode()); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); ++LdStFP2Int; return NewST; @@ -9039,7 +9305,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return false; // Only look at ends of store sequences. - SDValue Chain = SDValue(St, 1); + SDValue Chain = SDValue(St, 0); if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) return false; @@ -9070,7 +9336,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { StoreSDNode *Index = St; while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 1)->hasOneUse()) + if (Index != St && !SDValue(Index, 0)->hasOneUse()) break; // Find the base pointer and offset for this memory node. @@ -9301,8 +9567,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Since we know that St is redundant, just iterate. while (!St->use_empty()) DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - removeFromWorkList(St); - DAG.DeleteNode(St); + deleteAndRecombine(St); } return true; @@ -9361,6 +9626,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (LoadNodes.size() < 2) return false; + // If we have load/store pair instructions and we only have two values, + // don't bother. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && + St->getAlignment() >= RequiredAlignment) + return false; + // Scan the memory operations on the chain and find the first non-consecutive // load memory address. These variables hold the index in the store node // array. @@ -9476,8 +9748,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { continue; StoreSDNode *St = cast(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - removeFromWorkList(St); - DAG.DeleteNode(St); + deleteAndRecombine(St); } return true; @@ -9503,7 +9774,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), OrigAlign, - ST->getTBAAInfo()); + ST->getAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -9557,19 +9828,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment(), TBAAInfo); + ST->getAlignment(), AAInfo); Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment, TBAAInfo); + Alignment, AAInfo); return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -9586,7 +9857,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, - ST->getTBAAInfo()); + ST->getAAInfo()); } } @@ -9596,8 +9867,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : - TLI.getTargetMachine().getSubtarget().useAA(); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -9625,7 +9896,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { MVT::Other, Chain, ReplStore); // Make sure the new and old chains are cleaned up. - AddToWorkList(Token.getNode()); + AddToWorklist(Token.getNode()); // Don't add users to work list. return CombineTo(N, Token, false); @@ -9647,7 +9918,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { APInt::getLowBitsSet( Value.getValueType().getScalarType().getSizeInBits(), ST->getMemoryVT().getScalarType().getSizeInBits())); - AddToWorkList(Value.getNode()); + AddToWorklist(Value.getNode()); if (Shorter.getNode()) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); @@ -9674,6 +9945,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // If this is a store followed by a store with the same value to the same + // location, then the store is dead/noop. + if (StoreSDNode *ST1 = dyn_cast(Chain)) { + if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && + ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && + ST1->isUnindexed() && !ST1->isVolatile()) { + // The store is dead, remove it. + return Chain; + } + } + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) @@ -9741,7 +10023,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, InVec.getOperand(0), InVal, EltNo); - AddToWorkList(NewOp.getNode()); + AddToWorklist(NewOp.getNode()); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); } @@ -9829,32 +10111,32 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), - NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(), - OriginalLoad->isNonTemporal(), Align, - OriginalLoad->getTBAAInfo()); + Load = DAG.getExtLoad( + ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, + VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); Chain = Load.getValue(1); } else { Load = DAG.getLoad( VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), - OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo()); + OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); Chain = Load.getValue(1); if (ResultVT.bitsLT(VecEltVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); else Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); } - WorkListRemover DeadNodes(*this); + WorklistRemover DeadNodes(*this); SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddToWorkList(Load.getNode()); - AddUsersToWorkList(Load.getNode()); // Add users too + AddToWorklist(Load.getNode()); + AddUsersToWorklist(Load.getNode()); // Add users too // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorkList(EVE); + AddToWorklist(EVE); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -9952,7 +10234,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && - ISD::isNormalLoad(InVec.getNode())) { + ISD::isNormalLoad(InVec.getNode()) && + !N->getOperand(1)->hasPredecessor(InVec.getNode())) { SDValue Index = N->getOperand(1); if (LoadSDNode *OrigLoad = dyn_cast(InVec)) return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, @@ -10135,7 +10418,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. - AddToWorkList(BV.getNode()); + AddToWorklist(BV.getNode()); // Bitcast to the desired type. return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -10201,7 +10484,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { Opnds.push_back(In.getOperand(0)); } SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); - AddToWorkList(BV.getNode()); + AddToWorklist(BV.getNode()); return DAG.getNode(Opcode, dl, VT, BV); } @@ -10227,9 +10510,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. + if (!isTypeLegal(VT)) + return SDValue(); + // May only combine to shuffle after legalize if shuffle is legal. - if (LegalOperations && - !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) return SDValue(); SDValue VecIn1, VecIn2; @@ -10319,10 +10605,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { VecIn1.getValueType() != VT) return SDValue(); - // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. - if (!isTypeLegal(VT)) - return SDValue(); - // Return the new VECTOR_SHUFFLE node. SDValue Ops[2]; Ops[0] = VecIn1; @@ -10513,6 +10795,92 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, + SDValue V, SelectionDAG &DAG) { + SDLoc DL(V); + EVT VT = V.getValueType(); + + switch (V.getOpcode()) { + default: + return V; + + case ISD::CONCAT_VECTORS: { + EVT OpVT = V->getOperand(0).getValueType(); + int OpSize = OpVT.getVectorNumElements(); + SmallBitVector OpUsedElements(OpSize, false); + bool FoundSimplification = false; + SmallVector NewOps; + NewOps.reserve(V->getNumOperands()); + for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { + SDValue Op = V->getOperand(i); + bool OpUsed = false; + for (int j = 0; j < OpSize; ++j) + if (UsedElements[i * OpSize + j]) { + OpUsedElements[j] = true; + OpUsed = true; + } + NewOps.push_back( + OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) + : DAG.getUNDEF(OpVT)); + FoundSimplification |= Op == NewOps.back(); + OpUsedElements.reset(); + } + if (FoundSimplification) + V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); + return V; + } + + case ISD::INSERT_SUBVECTOR: { + SDValue BaseV = V->getOperand(0); + SDValue SubV = V->getOperand(1); + auto *IdxN = dyn_cast(V->getOperand(2)); + if (!IdxN) + return V; + + int SubSize = SubV.getValueType().getVectorNumElements(); + int Idx = IdxN->getZExtValue(); + bool SubVectorUsed = false; + SmallBitVector SubUsedElements(SubSize, false); + for (int i = 0; i < SubSize; ++i) + if (UsedElements[i + Idx]) { + SubVectorUsed = true; + SubUsedElements[i] = true; + UsedElements[i + Idx] = false; + } + + // Now recurse on both the base and sub vectors. + SDValue SimplifiedSubV = + SubVectorUsed + ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) + : DAG.getUNDEF(SubV.getValueType()); + SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); + if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) + V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); + return V; + } + } +} + +static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, + SDValue N1, SelectionDAG &DAG) { + EVT VT = SVN->getValueType(0); + int NumElts = VT.getVectorNumElements(); + SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); + for (int M : SVN->getMask()) + if (M >= 0 && M < NumElts) + N0UsedElements[M] = true; + else if (M >= NumElts) + N1UsedElements[M - NumElts] = true; + + SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); + SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); + if (S0 == N0 && S1 == N1) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -10665,6 +11033,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // There are various patterns used to build up a vector from smaller vectors, + // subvectors, or elements. Scan chains of these and replace unused insertions + // or components with undef. + if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) + return S; + if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && (N1.getOpcode() == ISD::UNDEF || @@ -10699,7 +11073,15 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { Idx = OtherSV->getMaskElt(Idx); Mask.push_back(Idx); } - + + // Check if all indices in Mask are Undef. In case, propagate Undef. + bool isUndefMask = true; + for (unsigned i = 0; i != NumElts && isUndefMask; ++i) + isUndefMask &= Mask[i] < 0; + + if (isUndefMask) + return DAG.getUNDEF(VT); + bool CommuteOperands = false; if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { // To be valid, the combine shuffle mask should only reference elements @@ -10738,12 +11120,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // The combined shuffle must map each index to itself. IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; } - + if (IsIdentityMask) { if (CommuteOperands) // optimize shuffle(shuffle(x, y), undef) -> y. return OtherSV->getOperand(1); - + // optimize shuffle(shuffle(x, undef), undef) -> x // optimize shuffle(shuffle(x, y), undef) -> x return OtherSV->getOperand(0); @@ -10751,16 +11133,134 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // It may still be beneficial to combine the two shuffles if the // resulting shuffle is legal. + if (TLI.isTypeLegal(VT)) { + if (!CommuteOperands) { + if (TLI.isShuffleMaskLegal(Mask, VT)) + // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, + &Mask[0]); + } else { + // Compute the commuted shuffle mask. + for (unsigned i = 0; i != NumElts; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElts) + Mask[i] = idx + NumElts; + else + Mask[i] = idx - NumElts; + } + + if (TLI.isShuffleMaskLegal(Mask, VT)) + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1, + &Mask[0]); + } + } + } + + // Canonicalize shuffles according to rules: + // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) + // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) + // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF && + N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + TLI.isTypeLegal(VT)) { + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(N1->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = N1->getOperand(0); + SDValue SV1 = N1->getOperand(1); + bool HasSameOp0 = N0 == SV0; + bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + if (HasSameOp0 || IsSV1Undef || N0 == SV1) + // Commute the operands of this shuffle so that next rule + // will trigger. + return DAG.getCommutedVectorShuffle(*SVN); + } + + // Try to fold according to rules: + // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + // Don't try to fold shuffles with illegal type. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) { + ShuffleVectorSDNode *OtherSV = cast(N0); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = OtherSV->getOperand(0); + SDValue SV1 = OtherSV->getOperand(1); + bool HasSameOp0 = N1 == SV0; + bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + if (!HasSameOp0 && !IsSV1Undef && N1 != SV1) + // Early exit. + return SDValue(); + + SmallVector Mask; + // Compute the combined shuffle mask for a shuffle with SV0 as the first + // operand, and SV1 as the second operand. + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + if (Idx < (int)NumElts) { + Idx = OtherSV->getMaskElt(Idx); + if (IsSV1Undef && Idx >= (int) NumElts) + Idx = -1; // Propagate Undef. + } else + Idx = HasSameOp0 ? Idx - NumElts : Idx; + + Mask.push_back(Idx); + } + + // Check if all indices in Mask are Undef. In case, propagate Undef. + bool isUndefMask = true; + for (unsigned i = 0; i != NumElts && isUndefMask; ++i) + isUndefMask &= Mask[i] < 0; + + if (isUndefMask) + return DAG.getUNDEF(VT); + + // Avoid introducing shuffles with illegal mask. + if (TLI.isShuffleMaskLegal(Mask, VT)) { + if (IsSV1Undef) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + } + + // Compute the commuted shuffle mask. + for (unsigned i = 0; i != NumElts; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElts) + Mask[i] = idx + NumElts; + else + Mask[i] = idx - NumElts; + } + if (TLI.isShuffleMaskLegal(Mask, VT)) { - if (!CommuteOperands) - // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, - &Mask[0]); - - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), - &Mask[0]); + if (IsSV1Undef) + // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), N1, SV0, &Mask[0]); + // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV1, SV0, &Mask[0]); } } @@ -10794,8 +11294,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return SDValue(); } -/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform -/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle +/// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { @@ -10817,7 +11317,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (cast(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast(Elt)->isNullValue()) - Indices.push_back(NumElts); + Indices.push_back(NumElts+i); else return SDValue(); } @@ -10841,7 +11341,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); } -/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +/// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { assert(N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"); @@ -10896,7 +11396,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { FoldOp.getOpcode() != ISD::ConstantFP) break; Ops.push_back(FoldOp); - AddToWorkList(FoldOp.getNode()); + AddToWorklist(FoldOp.getNode()); } if (Ops.size() == LHS.getNumOperands()) @@ -10918,7 +11418,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue UndefVector = LHS.getOperand(1); SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS.getOperand(0), RHS.getOperand(0)); - AddUsersToWorkList(N); + AddUsersToWorklist(N); return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, &SVN0->getMask()[0]); } @@ -10927,7 +11427,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } -/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. +/// Visit a binary vector operation, like FABS/FNEG. SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { assert(N->getValueType(0).isVector() && "SimplifyVUnaryOp only works on vectors!"); @@ -10950,7 +11450,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { FoldOp.getOpcode() != ISD::ConstantFP) break; Ops.push_back(FoldOp); - AddToWorkList(FoldOp.getNode()); + AddToWorklist(FoldOp.getNode()); } if (Ops.size() != N0.getNumOperands()) @@ -10977,9 +11477,9 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); - AddToWorkList(SETCC.getNode()); - return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), - SCC.getOperand(2), SCC.getOperand(3), SETCC); + AddToWorklist(SETCC.getNode()); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, + SCC.getOperand(2), SCC.getOperand(3)); } return SCC; @@ -10987,12 +11487,11 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, return SDValue(); } -/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS -/// are the two values being selected between, see if we can simplify the -/// select. Callers of this should assume that TheSelect is deleted if this -/// returns true. As such, they should return the appropriate thing (e.g. the -/// node) back to the top-level of the DAG combiner loop to avoid it being -/// looked at. +/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values +/// being selected between, see if we can simplify the select. Callers of this +/// should assume that TheSelect is deleted if this returns true. As such, they +/// should return the appropriate thing (e.g. the node) back to the top-level of +/// the DAG combiner loop to avoid it being looked at. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { @@ -11071,22 +11570,27 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, } SDValue Load; + // It is safe to replace the two loads if they have different alignments, + // but the new load must be the minimum (most restrictive) alignment of the + // inputs. + bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); + unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), - // FIXME: Discards pointer and TBAA info. + // FIXME: Discards pointer and AA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), - LLD->isInvariant(), LLD->getAlignment()); + isInvariant, Alignment); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer and TBAA info. + // FIXME: Discards pointer and AA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), - LLD->isNonTemporal(), LLD->getAlignment()); + LLD->isNonTemporal(), isInvariant, Alignment); } // Users of the select now use the result of the load. @@ -11102,7 +11606,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, return false; } -/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, @@ -11118,7 +11622,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // Determine if the condition we're dealing with is constant SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); - if (SCC.getNode()) AddToWorkList(SCC.getNode()); + if (SCC.getNode()) AddToWorklist(SCC.getNode()); ConstantSDNode *SCCC = dyn_cast_or_null(SCC.getNode()); // fold select_cc true, x, y -> x @@ -11186,13 +11690,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); - AddToWorkList(Cond.getNode()); + AddToWorklist(Cond.getNode()); SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); - AddToWorkList(CstOffset.getNode()); + AddToWorklist(CstOffset.getNode()); CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); - AddToWorkList(CPIdx.getNode()); + AddToWorklist(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); @@ -11217,11 +11721,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); } return DAG.getNode(ISD::AND, DL, AType, Shift, N2); @@ -11231,11 +11735,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); if (XType.bitsGT(AType)) { Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); - AddToWorkList(Shift.getNode()); + AddToWorklist(Shift.getNode()); } return DAG.getNode(ISD::AND, DL, AType, Shift, N2); @@ -11305,8 +11809,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, N2.getValueType(), SCC); } - AddToWorkList(SCC.getNode()); - AddToWorkList(Temp.getNode()); + AddToWorklist(SCC.getNode()); + AddToWorklist(Temp.getNode()); if (N2C->getAPIntValue() == 1) return Temp; @@ -11385,8 +11889,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, getShiftAmountTy(N0.getValueType()))); SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), XType, N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); } } @@ -11394,7 +11898,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return SDValue(); } -/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +/// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, SDLoc DL, bool foldBooleans) { @@ -11403,10 +11907,10 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } -/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, -/// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// +/// Given an ISD::SDIV node expressing a divide by constant, return +/// a DAG expression to select that will generate the same value by multiplying +/// by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) @@ -11421,14 +11925,33 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); for (SDNode *N : Built) - AddToWorkList(N); + AddToWorklist(N); return S; } -/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, -/// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// +/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a +/// DAG expression that will generate the same value by right shifting. +SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + + std::vector Built; + SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); + + for (SDNode *N : Built) + AddToWorklist(N); + return S; +} + +/// Given an ISD::UDIV node expressing a divide by constant, return a DAG +/// expression that will generate the same value by multiplying by a magic +/// number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) @@ -11443,13 +11966,145 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); for (SDNode *N : Built) - AddToWorkList(N); + AddToWorklist(N); return S; } -/// FindBaseOffset - Return true if base is a frame index, which is known not -// to alias with anything but itself. Provides base object and offset as -// results. +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + + unsigned Iterations = 0; + if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { + if (Iterations) { + // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) + // For the reciprocal, we need to find the zero of the function: + // F(X) = A X - 1 [which has a zero at X = 1/A] + // => + // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form + // does not require additional intermediate precision] + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FPOne = DAG.getConstantFP(1.0, VT); + + AddToWorklist(Est.getNode()); + + // Newton iterations: Est = Est + Est (1 - Arg * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); + AddToWorklist(Est.getNode()); + } + } + return Est; + } + + return SDValue(); +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = X_i (1.5 - A X_i^2 / 2) +/// As a result, we precompute A/2 prior to the iteration loop. +SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, + unsigned Iterations) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); + + // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that + // this entire sequence requires only one FP constant. + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + AddToWorklist(HalfArg.getNode()); + + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); + AddToWorklist(HalfArg.getNode()); + + // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + AddToWorklist(Est.getNode()); + } + return Est; +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) +SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, + unsigned Iterations) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue MinusThree = DAG.getConstantFP(-3.0, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); + + // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); + AddToWorklist(HalfEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); + AddToWorklist(Est.getNode()); + } + return Est; +} + +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + unsigned Iterations = 0; + bool UseOneConstNR = false; + if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { + AddToWorklist(Est.getNode()); + if (Iterations) { + Est = UseOneConstNR ? + BuildRsqrtNROneConst(Op, Est, Iterations) : + BuildRsqrtNRTwoConst(Op, Est, Iterations); + } + return Est; + } + + return SDValue(); +} + +/// Return true if base is a frame index, which is known not to alias with +/// anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. @@ -11485,8 +12140,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, return isa(Base); } -/// isAlias - Return true if there is any possibility that the two addresses -/// overlap. +/// Return true if there is any possibility that the two addresses overlap. bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are the same then they must be aliases. if (Op0->getBasePtr() == Op1->getBasePtr()) return true; @@ -11545,8 +12199,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { return false; } - bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : - TLI.getTargetMachine().getSubtarget().useAA(); + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 + ? CombinerGlobalAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -11564,10 +12219,10 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { AliasAnalysis::AliasResult AAResult = AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), Overlap1, - UseTBAA ? Op0->getTBAAInfo() : nullptr), + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), AliasAnalysis::Location(Op1->getMemOperand()->getValue(), Overlap2, - UseTBAA ? Op1->getTBAAInfo() : nullptr)); + UseTBAA ? Op1->getAAInfo() : AAMDNodes())); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -11576,7 +12231,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { return true; } -/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl &Aliases) { @@ -11612,7 +12267,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, } // Don't bother if we've been before. - if (!Visited.insert(Chain.getNode())) + if (!Visited.insert(Chain.getNode()).second) continue; switch (Chain.getOpcode()) { @@ -11687,10 +12342,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // like register copies will interfere with trivial cases). SmallVector Worklist; - for (SmallPtrSet::iterator I = Visited.begin(), - IE = Visited.end(); I != IE; ++I) - if (*I != OriginalChain.getNode()) - Worklist.push_back(*I); + for (const SDNode *N : Visited) + if (N != OriginalChain.getNode()) + Worklist.push_back(N); while (!Worklist.empty()) { const SDNode *M = Worklist.pop_back_val(); @@ -11701,7 +12355,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, for (SDNode::use_iterator UI = M->use_begin(), UIE = M->use_end(); UI != UIE; ++UI) - if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) { + if (UI.getUse().getValueType() == MVT::Other && + Visited.insert(*UI).second) { if (isa(*UI) || isa(*UI)) { // We've not visited this use, and we care about it (it could have an // ordering dependency with the original node). @@ -11717,8 +12372,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, } } -/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking -/// for a better chain (aliasing node.) +/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain +/// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { SmallVector Aliases; // Ops for replacing token factor. @@ -11737,11 +12392,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } -// SelectionDAG::Combine - This is the entry point for the file. -// +/// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel) { - /// run - This is the main entry point to this class. - /// + /// This is the main entry point to this class. DAGCombiner(*this, AA, OptLevel).Run(Level); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 445572a..8facbc2 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,6 +39,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" @@ -64,19 +65,32 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "isel" STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " - "target-independent selector"); + "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " - "target-specific selector"); + "target-specific selector"); STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); -/// startNewBlock - Set the current block to which generated machine -/// instructions will be appended, and clear the local CSE map. -/// +void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); + IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} + +/// Set the current block to which generated machine instructions will be +/// appended, and clear the local CSE map. void FastISel::startNewBlock() { LocalValueMap.clear(); @@ -89,18 +103,19 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } -bool FastISel::LowerArguments() { +bool FastISel::lowerArguments() { if (!FuncInfo.CanLowerReturn) // Fallback to SDISel argument lowering code to deal with sret pointer // parameter. return false; - if (!FastLowerArguments()) + if (!fastLowerArguments()) return false; // Enter arguments into ValueMap for uses in non-entry BBs. for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), - E = FuncInfo.Fn->arg_end(); I != E; ++I) { + E = FuncInfo.Fn->arg_end(); + I != E; ++I) { DenseMap::iterator VI = LocalValueMap.find(I); assert(VI != LocalValueMap.end() && "Missed an argument?"); FuncInfo.ValueMap[I] = VI->second; @@ -112,22 +127,30 @@ void FastISel::flushLocalValueMap() { LocalValueMap.clear(); LastLocalValue = EmitStartPt; recomputeInsertPt(); + SavedInsertPt = FuncInfo.InsertPt; } -bool FastISel::hasTrivialKill(const Value *V) const { +bool FastISel::hasTrivialKill(const Value *V) { // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast(V); if (!I) return false; // No-op casts are trivially coalesced by fast-isel. - if (const CastInst *Cast = dyn_cast(I)) + if (const auto *Cast = dyn_cast(I)) if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && !hasTrivialKill(Cast->getOperand(0))) return false; + // Even the value might have only one use in the LLVM IR, it is possible that + // FastISel might fold the use into another instruction and now there is more + // than one use at the Machine Instruction level. + unsigned Reg = lookUpRegForValue(V); + if (Reg && !MRI.use_empty(Reg)) + return false; + // GEPs with all zero indices are trivially coalesced by fast-isel. - if (const GetElementPtrInst *GEP = dyn_cast(I)) + if (const auto *GEP = dyn_cast(I)) if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) return false; @@ -160,7 +183,7 @@ unsigned FastISel::getRegForValue(const Value *V) { // Look up the value to see if we already have a register for it. unsigned Reg = lookUpRegForValue(V); - if (Reg != 0) + if (Reg) return Reg; // In bottom-up mode, just create the virtual register which will be used @@ -181,29 +204,24 @@ unsigned FastISel::getRegForValue(const Value *V) { return Reg; } -/// materializeRegForValue - Helper for getRegForValue. This function is -/// called when the value isn't already available in a register and must -/// be materialized with new instructions. -unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { +unsigned FastISel::materializeConstant(const Value *V, MVT VT) { unsigned Reg = 0; - - if (const ConstantInt *CI = dyn_cast(V)) { + if (const auto *CI = dyn_cast(V)) { if (CI->getValue().getActiveBits() <= 64) - Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); - } else if (isa(V)) { - Reg = TargetMaterializeAlloca(cast(V)); - } else if (isa(V)) { + Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa(V)) + Reg = fastMaterializeAlloca(cast(V)); + else if (isa(V)) // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. - Reg = - getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext()))); - } else if (const ConstantFP *CF = dyn_cast(V)) { - if (CF->isNullValue()) { - Reg = TargetMaterializeFloatZero(CF); - } else { + Reg = getRegForValue( + Constant::getNullValue(DL.getIntPtrType(V->getContext()))); + else if (const auto *CF = dyn_cast(V)) { + if (CF->isNullValue()) + Reg = fastMaterializeFloatZero(CF); + else // Try to emit the constant directly. - Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); - } + Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF); if (!Reg) { // Try to emit the constant by using an integer constant with a cast. @@ -213,22 +231,22 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; - (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); + (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, x); unsigned IntegerReg = - getRegForValue(ConstantInt::get(V->getContext(), IntVal)); + getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) - Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, - IntegerReg, /*Kill=*/false); + Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, + /*Kill=*/false); } } - } else if (const Operator *Op = dyn_cast(V)) { - if (!SelectOperator(Op, Op->getOpcode())) + } else if (const auto *Op = dyn_cast(V)) { + if (!selectOperator(Op, Op->getOpcode())) if (!isa(Op) || - !TargetSelectInstruction(cast(Op))) + !fastSelectInstruction(cast(Op))) return 0; Reg = lookUpRegForValue(Op); } else if (isa(V)) { @@ -236,15 +254,26 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } + return Reg; +} + +/// Helper for getRegForValue. This function is called when the value isn't +/// already available in a register and must be materialized with new +/// instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + // Give the target-specific code a try first. + if (isa(V)) + Reg = fastMaterializeConstant(cast(V)); - // If target-independent code couldn't handle the value, give target-specific - // code a try. - if (!Reg && isa(V)) - Reg = TargetMaterializeConstant(cast(V)); + // If target-specific code couldn't or didn't want to handle the value, then + // give target-independent code a try. + if (!Reg) + Reg = materializeConstant(V, VT); // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. - if (Reg != 0) { + if (Reg) { LocalValueMap[V] = Reg; LastLocalValue = MRI.getVRegDef(Reg); } @@ -262,13 +291,7 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { return LocalValueMap[V]; } -/// UpdateValueMap - Update the value map to include the new mapping for this -/// instruction, or insert an extra copy to get the result in a previous -/// determined register. -/// NOTE: This is only necessary because we might select a block that uses -/// a value before we select the block that defines the value. It might be -/// possible to fix this by selecting blocks in reverse postorder. -void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { +void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { if (!isa(I)) { LocalValueMap[I] = Reg; return; @@ -281,7 +304,7 @@ void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. for (unsigned i = 0; i < NumRegs; i++) - FuncInfo.RegFixups[AssignedReg+i] = Reg+i; + FuncInfo.RegFixups[AssignedReg + i] = Reg + i; AssignedReg = Reg; } @@ -299,13 +322,12 @@ std::pair FastISel::getRegForGEPIndex(const Value *Idx) { MVT PtrVT = TLI.getPointerTy(); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, - IdxN, IdxNIsKill); + IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN, + IdxNIsKill); IdxNIsKill = true; - } - else if (IdxVT.bitsGT(PtrVT)) { - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, - IdxN, IdxNIsKill); + } else if (IdxVT.bitsGT(PtrVT)) { + IdxN = + fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); IdxNIsKill = true; } return std::pair(IdxN, IdxNIsKill); @@ -327,7 +349,7 @@ void FastISel::recomputeInsertPt() { void FastISel::removeDeadCode(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { - assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!"); while (I != E) { MachineInstr *Dead = &*I; ++I; @@ -342,7 +364,7 @@ FastISel::SavePoint FastISel::enterLocalValueArea() { DebugLoc OldDL = DbgLoc; recomputeInsertPt(); DbgLoc = DebugLoc(); - SavePoint SP = { OldInsertPt, OldDL }; + SavePoint SP = {OldInsertPt, OldDL}; return SP; } @@ -355,10 +377,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { DbgLoc = OldInsertPt.DL; } -/// SelectBinaryOp - Select and emit code for a binary operator instruction, -/// which has an opcode which directly corresponds to the given ISD opcode. -/// -bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { +bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -371,9 +390,8 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { if (!TLI.isTypeLegal(VT)) { // MVT::i1 is special. Allow AND, OR, or XOR because they // don't require additional zeroing, which makes them easy. - if (VT == MVT::i1 && - (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || - ISDOpcode == ISD::XOR)) + if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) VT = TLI.getTypeToTransformTo(I->getContext(), VT); else return false; @@ -381,38 +399,36 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Check if the first operand is a constant, and handle it as "ri". At -O0, // we don't have anything that canonicalizes operand order. - if (ConstantInt *CI = dyn_cast(I->getOperand(0))) + if (const auto *CI = dyn_cast(I->getOperand(0))) if (isa(I) && cast(I)->isCommutative()) { unsigned Op1 = getRegForValue(I->getOperand(1)); - if (Op1 == 0) return false; - + if (!Op1) + return false; bool Op1IsKill = hasTrivialKill(I->getOperand(1)); - unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, - Op1IsKill, CI->getZExtValue(), - VT.getSimpleVT()); - if (ResultReg == 0) return false; + unsigned ResultReg = + fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, + CI->getZExtValue(), VT.getSimpleVT()); + if (!ResultReg) + return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } - unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. - if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { + if (const auto *CI = dyn_cast(I->getOperand(1))) { uint64_t Imm = CI->getZExtValue(); // Transform "sdiv exact X, 8" -> "sra X, 3". if (ISDOpcode == ISD::SDIV && isa(I) && - cast(I)->isExact() && - isPowerOf2_64(Imm)) { + cast(I)->isExact() && isPowerOf2_64(Imm)) { Imm = Log2_64(Imm); ISDOpcode = ISD::SRA; } @@ -424,54 +440,49 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::AND; } - unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); - if (ResultReg == 0) return false; + if (!ResultReg) + return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } // Check if the second operand is a constant float. - if (ConstantFP *CF = dyn_cast(I->getOperand(1))) { - unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + if (const auto *CF = dyn_cast(I->getOperand(1))) { + unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, CF); - if (ResultReg != 0) { + if (ResultReg) { // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } } unsigned Op1 = getRegForValue(I->getOperand(1)); - if (Op1 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!Op1) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op1IsKill = hasTrivialKill(I->getOperand(1)); // Now we have both operands in registers. Emit the instruction. - unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, - Op0, Op0IsKill, - Op1, Op1IsKill); - if (ResultReg == 0) + unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); + if (!ResultReg) // Target-specific code wasn't able to find a machine opcode for // the given ISD opcode and type. Halt "fast" selection and bail. return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool FastISel::SelectGetElementPtr(const User *I) { +bool FastISel::selectGetElementPtr(const User *I) { unsigned N = getRegForValue(I->getOperand(0)); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; - bool NIsKill = hasTrivialKill(I->getOperand(0)); // Keep a running tab of the total offset to coalesce multiple N = N + Offset @@ -481,18 +492,18 @@ bool FastISel::SelectGetElementPtr(const User *I) { uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); - for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, - E = I->op_end(); OI != E; ++OI) { + for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1, + E = I->op_end(); + OI != E; ++OI) { const Value *Idx = *OI; - if (StructType *StTy = dyn_cast(Ty)) { + if (auto *StTy = dyn_cast(Ty)) { unsigned Field = cast(Idx)->getZExtValue(); if (Field) { // N = N + Offset TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); if (TotalOffs >= MaxOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -503,15 +514,15 @@ bool FastISel::SelectGetElementPtr(const User *I) { Ty = cast(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - if (const ConstantInt *CI = dyn_cast(Idx)) { - if (CI->isZero()) continue; + if (const auto *CI = dyn_cast(Idx)) { + if (CI->isZero()) + continue; // N = N + Offset TotalOffs += - DL.getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); + DL.getTypeAllocSize(Ty) * cast(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -519,9 +530,8 @@ bool FastISel::SelectGetElementPtr(const User *I) { continue; } if (TotalOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -532,43 +542,37 @@ bool FastISel::SelectGetElementPtr(const User *I) { std::pair Pair = getRegForGEPIndex(Idx); unsigned IdxN = Pair.first; bool IdxNIsKill = Pair.second; - if (IdxN == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; if (ElementSize != 1) { - IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); - if (IdxN == 0) - // Unhandled operand. Halt "fast" selection and bail. + IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; IdxNIsKill = true; } - N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; } } if (TotalOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; } // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, N); + updateValueMap(I, N); return true; } -/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands -/// to a stackmap or patchpoint machine instruction. bool FastISel::addStackMapLiveVars(SmallVectorImpl &Ops, const CallInst *CI, unsigned StartIdx) { for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { Value *Val = CI->getArgOperand(i); // Check for constants and encode them with a StackMaps::ConstantOp prefix. - if (auto *C = dyn_cast(Val)) { + if (const auto *C = dyn_cast(Val)) { Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); } else if (isa(Val)) { @@ -585,16 +589,15 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl &Ops, return false; } else { unsigned Reg = getRegForValue(Val); - if (Reg == 0) + if (!Reg) return false; Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); } } - return true; } -bool FastISel::SelectStackmap(const CallInst *I) { +bool FastISel::selectStackmap(const CallInst *I) { // void @llvm.experimental.stackmap(i64 , i32 , // [live variables...]) assert(I->getCalledFunction()->getReturnType()->isVoidTy() && @@ -621,7 +624,7 @@ bool FastISel::SelectStackmap(const CallInst *I) { assert(isa(I->getOperand(PatchPointOpers::NBytesPos)) && "Expected a constant integer."); const auto *NumBytes = - cast(I->getOperand(PatchPointOpers::NBytesPos)); + cast(I->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); // Push live variables for the stack map (skipping the first two arguments @@ -637,13 +640,13 @@ bool FastISel::SelectStackmap(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(0); + .addImm(0); // Issue STACKMAP. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -654,7 +657,8 @@ bool FastISel::SelectStackmap(const CallInst *I) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(0).addImm(0); + .addImm(0) + .addImm(0); // Inform the Frame Information that we have a stackmap in this function. FuncInfo.MF->getFrameInfo()->setHasStackMap(); @@ -662,11 +666,370 @@ bool FastISel::SelectStackmap(const CallInst *I) { return true; } -bool FastISel::SelectCall(const User *I) { +/// \brief Lower an argument list according to the target calling convention. +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, + unsigned NumArgs, const Value *Callee, + bool ForceRetVoidTy, CallLoweringInfo &CLI) { + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext()) + : CI->getType(); + CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs); + + return lowerCallTo(CLI); +} + +bool FastISel::selectPatchpoint(const CallInst *I) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 , + // i32 , + // i8* , + // i32 , + // [Args...], + // [live variables...]) + CallingConv::ID CC = I->getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !I->getType()->isVoidTy(); + Value *Callee = I->getOperand(PatchPointOpers::TargetPos); + + // Get the real number of arguments participating in the call + assert(isa(I->getOperand(PatchPointOpers::NArgPos)) && + "Expected a constant integer."); + const auto *NumArgsVal = + cast(I->getOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = NumArgsVal->getZExtValue(); + + // Skip the four meta args: , , , + // This includes all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + CallLoweringInfo CLI; + if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) + return false; + + assert(CLI.Call && "No call instruction specified."); + + SmallVector Ops; + + // Add an explicit result reg if we use the anyreg calling convention. + if (IsAnyRegCC && HasDef) { + assert(CLI.NumResultRegs == 0 && "Unexpected result register."); + CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); + CLI.NumResultRegs = 1; + Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true)); + } + + // Add the and constants. + assert(isa(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Assume that the callee is a constant address or null pointer. + // FIXME: handle function symbols in the future. + uint64_t CalleeAddr; + if (const auto *C = dyn_cast(Callee)) + CalleeAddr = cast(C->getOperand(0))->getZExtValue(); + else if (const auto *C = dyn_cast(Callee)) { + if (C->getOpcode() == Instruction::IntToPtr) + CalleeAddr = cast(C->getOperand(0))->getZExtValue(); + else + llvm_unreachable("Unsupported ConstantExpr."); + } else if (isa(Callee)) + CalleeAddr = 0; + else + llvm_unreachable("Unsupported callee address."); + + Ops.push_back(MachineOperand::CreateImm(CalleeAddr)); + + // Adjust to account for any arguments that have been passed on + // the stack instead. + unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size(); + Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs)); + + // Add the calling convention + Ops.push_back(MachineOperand::CreateImm((unsigned)CC)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (IsAnyRegCC) { + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) { + unsigned Reg = getRegForValue(I->getArgOperand(i)); + if (!Reg) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + + // Push the arguments from the call instruction. + for (auto Reg : CLI.OutRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + + // Push live variables for the stack map. + if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs)) + return false; + + // Push the register mask info. + Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC))); + + // Add scratch registers as implicit def and early clobber. + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Add implicit defs (return values). + for (auto Reg : CLI.InRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true, + /*IsImpl=*/true)); + + // Insert the patchpoint instruction before the call generated by the target. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, + TII.get(TargetOpcode::PATCHPOINT)); + + for (auto &MO : Ops) + MIB.addOperand(MO); + + MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + // Delete the original call instruction. + CLI.Call->eraseFromParent(); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); + + if (CLI.NumResultRegs) + updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); + return true; +} + +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) { + SmallVector Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); +} + +bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName, + unsigned NumArgs) { + ImmutableCallSite CS(CI); + + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FTy = cast(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, ArgI + 1); + Args.push_back(Entry); + } + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FTy, SymName, std::move(Args), CS, NumArgs); + + return lowerCallTo(CLI); +} + +bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { + // Handle the incoming return values from the call. + CLI.clearIns(); + SmallVector RetTys; + ComputeValueVTs(TLI, CLI.RetTy, RetTys); + + SmallVector Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI); + + bool CanLowerReturn = TLI.CanLowerReturn( + CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + // FIXME: sret demotion isn't supported yet - bail out. + if (!CanLowerReturn) + return false; + + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + + // Handle all of the outgoing arguments. + CLI.clearOuts(); + for (auto &Arg : CLI.getArgs()) { + Type *FinalType = Arg.Ty; + if (Arg.IsByVal) + FinalType = cast(Arg.Ty)->getElementType(); + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + + ISD::ArgFlagsTy Flags; + if (Arg.IsZExt) + Flags.setZExt(); + if (Arg.IsSExt) + Flags.setSExt(); + if (Arg.IsInReg) + Flags.setInReg(); + if (Arg.IsSRet) + Flags.setSRet(); + if (Arg.IsByVal) + Flags.setByVal(); + if (Arg.IsInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling in + // the various CC lowering callbacks. + Flags.setByVal(); + } + if (Arg.IsByVal || Arg.IsInAlloca) { + PointerType *Ty = cast(Arg.Ty); + Type *ElementTy = Ty->getElementType(); + unsigned FrameSize = DL.getTypeAllocSize(ElementTy); + // For ByVal, alignment should come from FE. BE will guess if this info is + // not there, but there are cases it cannot get right. + unsigned FrameAlign = Arg.Alignment; + if (!FrameAlign) + FrameAlign = TLI.getByValTypeAlignment(ElementTy); + Flags.setByValSize(FrameSize); + Flags.setByValAlign(FrameAlign); + } + if (Arg.IsNest) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); + Flags.setOrigAlign(OriginalAlignment); + + CLI.OutVals.push_back(Arg.Val); + CLI.OutFlags.push_back(Flags); + } + + if (!fastLowerCall(CLI)) + return false; + + // Set all unused physreg defs as dead. + assert(CLI.Call && "No call instruction specified."); + CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + if (CLI.NumResultRegs && CLI.CS) + updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + + return true; +} + +bool FastISel::lowerCall(const CallInst *CI) { + ImmutableCallSite CS(CI); + + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FuncTy = cast(PT->getElementType()); + Type *RetTy = FuncTy->getReturnType(); + + ArgListTy Args; + ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + Entry.Val = V; + Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Args.push_back(Entry); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within fastLowerCall. + bool IsTailCall = CI->isTailCall(); + if (IsTailCall && !isInTailCallPosition(CS, TM)) + IsTailCall = false; + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) + .setTailCall(IsTailCall); + + return lowerCallTo(CLI); +} + +bool FastISel::selectCall(const User *I) { const CallInst *Call = cast(I); // Handle simple inline asms. if (const InlineAsm *IA = dyn_cast(Call->getCalledValue())) { + // If the inline asm has side effects, then make sure that no local value + // lives across by flushing the local value map. + if (IA->hasSideEffects()) + flushLocalValueMap(); + // Don't attempt to handle constraints. if (!IA->getConstraintString().empty()) return false; @@ -679,34 +1042,46 @@ bool FastISel::SelectCall(const User *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) - .addExternalSymbol(IA->getAsmString().c_str()) - .addImm(ExtraInfo); + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); return true; } MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); ComputeUsesVAFloatArgument(*Call, &MMI); - const Function *F = Call->getCalledFunction(); - if (!F) return false; + // Handle intrinsic function calls. + if (const auto *II = dyn_cast(Call)) + return selectIntrinsicCall(II); + + // Usually, it does not make sense to initialize a value, + // make an unrelated function call and use the value, because + // it tends to be spilled on the stack. So, we move the pointer + // to the last local value to the beginning of the block, so that + // all the values which have already been materialized, + // appear after the call. It also makes sense to skip intrinsics + // since they tend to be inlined. + flushLocalValueMap(); - // Handle selected intrinsic function calls. - switch (F->getIntrinsicID()) { - default: break; - // At -O0 we don't care about the lifetime intrinsics. + return lowerCall(Call); +} + +bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: + break; + // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: - // The donothing intrinsic does, well, nothing. + // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: return true; - case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast(Call); + const DbgDeclareInst *DI = cast(II); DIVariable DIVar(DI->getVariable()); assert((!DIVar || DIVar.isVariable()) && - "Variable in DbgDeclareInst should be either null or a DIVariable."); - if (!DIVar || - !FuncInfo.MF->getMMI().hasDebugInfo()) { + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } @@ -719,11 +1094,11 @@ bool FastISel::SelectCall(const User *I) { unsigned Offset = 0; Optional Op; - if (const Argument *Arg = dyn_cast(Address)) + if (const auto *Arg = dyn_cast(Address)) // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Op = MachineOperand::CreateFI(Offset); + Op = MachineOperand::CreateFI(Offset); if (!Op) if (unsigned Reg = lookUpRegForValue(Address)) Op = MachineOperand::CreateReg(Reg, false); @@ -750,13 +1125,14 @@ bool FastISel::SelectCall(const User *I) { Op->setIsDebug(true); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, - DI->getVariable()); + DI->getVariable(), DI->getExpression()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) .addOperand(*Op) .addImm(0) - .addMetadata(DI->getVariable()); + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -766,33 +1142,41 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast(Call); + const DbgValueInst *DI = cast(II); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(0U).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); - } else if (const ConstantInt *CI = dyn_cast(V)) { + .addReg(0U) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CI = dyn_cast(V)) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addCImm(CI).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + .addCImm(CI) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addImm(CI->getZExtValue()).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); - } else if (const ConstantFP *CF = dyn_cast(V)) { + .addImm(CI->getZExtValue()) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CF = dyn_cast(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addFPImm(CF).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + .addFPImm(CF) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, - Reg, DI->getOffset(), DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, + DI->getOffset(), DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -801,46 +1185,38 @@ bool FastISel::SelectCall(const User *I) { return true; } case Intrinsic::objectsize: { - ConstantInt *CI = cast(Call->getArgOperand(1)); + ConstantInt *CI = cast(II->getArgOperand(1)); unsigned long long Res = CI->isZero() ? -1ULL : 0; - Constant *ResCI = ConstantInt::get(Call->getType(), Res); + Constant *ResCI = ConstantInt::get(II->getType(), Res); unsigned ResultReg = getRegForValue(ResCI); - if (ResultReg == 0) + if (!ResultReg) return false; - UpdateValueMap(Call, ResultReg); + updateValueMap(II, ResultReg); return true; } case Intrinsic::expect: { - unsigned ResultReg = getRegForValue(Call->getArgOperand(0)); - if (ResultReg == 0) + unsigned ResultReg = getRegForValue(II->getArgOperand(0)); + if (!ResultReg) return false; - UpdateValueMap(Call, ResultReg); + updateValueMap(II, ResultReg); return true; } case Intrinsic::experimental_stackmap: - return SelectStackmap(Call); + return selectStackmap(II); + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + return selectPatchpoint(II); } - // Usually, it does not make sense to initialize a value, - // make an unrelated function call and use the value, because - // it tends to be spilled on the stack. So, we move the pointer - // to the last local value to the beginning of the block, so that - // all the values which have already been materialized, - // appear after the call. It also makes sense to skip intrinsics - // since they tend to be inlined. - if (!isa(Call)) - flushLocalValueMap(); - - // An arbitrary call. Bail. - return false; + return fastLowerIntrinsicCall(II); } -bool FastISel::SelectCast(const User *I, unsigned Opcode) { +bool FastISel::selectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - if (SrcVT == MVT::Other || !SrcVT.isSimple() || - DstVT == MVT::Other || !DstVT.isSimple()) + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || + !DstVT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; @@ -859,24 +1235,22 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); - unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), - DstVT.getSimpleVT(), - Opcode, - InputReg, InputRegIsKill); + unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + Opcode, InputReg, InputRegIsKill); if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool FastISel::SelectBitCast(const User *I) { +bool FastISel::selectBitCast(const User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { unsigned Reg = getRegForValue(I->getOperand(0)); - if (Reg == 0) + if (!Reg) return false; - UpdateValueMap(I, Reg); + updateValueMap(I, Reg); return true; } @@ -891,17 +1265,15 @@ bool FastISel::SelectBitCast(const User *I) { MVT SrcVT = SrcEVT.getSimpleVT(); MVT DstVT = DstEVT.getSimpleVT(); unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT == DstVT) { - const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); - const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); @@ -912,28 +1284,27 @@ bool FastISel::SelectBitCast(const User *I) { // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) - ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); + ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool -FastISel::SelectInstruction(const Instruction *I) { +bool FastISel::selectInstruction(const Instruction *I) { // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa(I)) - if (!HandlePHINodesInSuccessorBlocks(I->getParent())) + if (!handlePHINodesInSuccessorBlocks(I->getParent())) return false; DbgLoc = I->getDebugLoc(); - MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + SavedInsertPt = FuncInfo.InsertPt; - if (const CallInst *Call = dyn_cast(I)) { + if (const auto *Call = dyn_cast(I)) { const Function *F = Call->getCalledFunction(); LibFunc::Func Func; @@ -951,40 +1322,39 @@ FastISel::SelectInstruction(const Instruction *I) { } // First, try doing target-independent selection. - if (SelectOperator(I, I->getOpcode())) { - ++NumFastIselSuccessIndependent; - DbgLoc = DebugLoc(); - return true; - } - // Remove dead code. However, ignore call instructions since we've flushed - // the local value map and recomputed the insert point. - if (!isa(I)) { + if (!SkipTargetIndependentISel) { + if (selectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; + DbgLoc = DebugLoc(); + return true; + } + // Remove dead code. recomputeInsertPt(); if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + SavedInsertPt = FuncInfo.InsertPt; } - // Next, try calling the target to attempt to handle the instruction. - SavedInsertPt = FuncInfo.InsertPt; - if (TargetSelectInstruction(I)) { + if (fastSelectInstruction(I)) { ++NumFastIselSuccessTarget; DbgLoc = DebugLoc(); return true; } - // Check for dead code and remove as necessary. + // Remove dead code. recomputeInsertPt(); if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); DbgLoc = DebugLoc(); + // Undo phi node updates, because they will be added again by SelectionDAG. + if (isa(I)) + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } -/// FastEmitBranch - Emit an unconditional branch to the given block, -/// unless it is the immediate (fall-through) successor, and update -/// the CFG. -void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { +/// Emit an unconditional branch to the given block, unless it is the immediate +/// (fall-through) successor, and update the CFG. +void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -1002,54 +1372,51 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); } -/// SelectFNeg - Emit an FNeg operation. -/// -bool -FastISel::SelectFNeg(const User *I) { +/// Emit an FNeg operation. +bool FastISel::selectFNeg(const User *I) { unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); - if (OpReg == 0) return false; - + if (!OpReg) + return false; bool OpRegIsKill = hasTrivialKill(I); // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(I->getType()); - unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), - ISD::FNEG, OpReg, OpRegIsKill); - if (ResultReg != 0) { - UpdateValueMap(I, ResultReg); + unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, + OpReg, OpRegIsKill); + if (ResultReg) { + updateValueMap(I, ResultReg); return true; } // Bitcast the value to integer, twiddle the sign bit with xor, // and then bitcast it back to floating-point. - if (VT.getSizeInBits() > 64) return false; + if (VT.getSizeInBits() > 64) + return false; EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); if (!TLI.isTypeLegal(IntVT)) return false; - unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), ISD::BITCAST, OpReg, OpRegIsKill); - if (IntReg == 0) + if (!IntReg) return false; - unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, - IntReg, /*Kill=*/true, - UINT64_C(1) << (VT.getSizeInBits()-1), - IntVT.getSimpleVT()); - if (IntResultReg == 0) + unsigned IntResultReg = fastEmit_ri_( + IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true, + UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); + if (!IntResultReg) return false; - ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), - ISD::BITCAST, IntResultReg, /*Kill=*/true); - if (ResultReg == 0) + ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST, + IntResultReg, /*IsKill=*/true); + if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool -FastISel::SelectExtractValue(const User *U) { +bool FastISel::selectExtractValue(const User *U) { const ExtractValueInst *EVI = dyn_cast(U); if (!EVI) return false; @@ -1085,55 +1452,54 @@ FastISel::SelectExtractValue(const User *U) { for (unsigned i = 0; i < VTIndex; i++) ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); - UpdateValueMap(EVI, ResultReg); + updateValueMap(EVI, ResultReg); return true; } -bool -FastISel::SelectOperator(const User *I, unsigned Opcode) { +bool FastISel::selectOperator(const User *I, unsigned Opcode) { switch (Opcode) { case Instruction::Add: - return SelectBinaryOp(I, ISD::ADD); + return selectBinaryOp(I, ISD::ADD); case Instruction::FAdd: - return SelectBinaryOp(I, ISD::FADD); + return selectBinaryOp(I, ISD::FADD); case Instruction::Sub: - return SelectBinaryOp(I, ISD::SUB); + return selectBinaryOp(I, ISD::SUB); case Instruction::FSub: // FNeg is currently represented in LLVM IR as a special case of FSub. if (BinaryOperator::isFNeg(I)) - return SelectFNeg(I); - return SelectBinaryOp(I, ISD::FSUB); + return selectFNeg(I); + return selectBinaryOp(I, ISD::FSUB); case Instruction::Mul: - return SelectBinaryOp(I, ISD::MUL); + return selectBinaryOp(I, ISD::MUL); case Instruction::FMul: - return SelectBinaryOp(I, ISD::FMUL); + return selectBinaryOp(I, ISD::FMUL); case Instruction::SDiv: - return SelectBinaryOp(I, ISD::SDIV); + return selectBinaryOp(I, ISD::SDIV); case Instruction::UDiv: - return SelectBinaryOp(I, ISD::UDIV); + return selectBinaryOp(I, ISD::UDIV); case Instruction::FDiv: - return SelectBinaryOp(I, ISD::FDIV); + return selectBinaryOp(I, ISD::FDIV); case Instruction::SRem: - return SelectBinaryOp(I, ISD::SREM); + return selectBinaryOp(I, ISD::SREM); case Instruction::URem: - return SelectBinaryOp(I, ISD::UREM); + return selectBinaryOp(I, ISD::UREM); case Instruction::FRem: - return SelectBinaryOp(I, ISD::FREM); + return selectBinaryOp(I, ISD::FREM); case Instruction::Shl: - return SelectBinaryOp(I, ISD::SHL); + return selectBinaryOp(I, ISD::SHL); case Instruction::LShr: - return SelectBinaryOp(I, ISD::SRL); + return selectBinaryOp(I, ISD::SRL); case Instruction::AShr: - return SelectBinaryOp(I, ISD::SRA); + return selectBinaryOp(I, ISD::SRA); case Instruction::And: - return SelectBinaryOp(I, ISD::AND); + return selectBinaryOp(I, ISD::AND); case Instruction::Or: - return SelectBinaryOp(I, ISD::OR); + return selectBinaryOp(I, ISD::OR); case Instruction::Xor: - return SelectBinaryOp(I, ISD::XOR); + return selectBinaryOp(I, ISD::XOR); case Instruction::GetElementPtr: - return SelectGetElementPtr(I); + return selectGetElementPtr(I); case Instruction::Br: { const BranchInst *BI = cast(I); @@ -1141,7 +1507,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { if (BI->isUnconditional()) { const BasicBlock *LLVMSucc = BI->getSuccessor(0); MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; - FastEmitBranch(MSucc, BI->getDebugLoc()); + fastEmitBranch(MSucc, BI->getDebugLoc()); return true; } @@ -1152,7 +1518,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { case Instruction::Unreachable: if (TM.Options.TrapUnreachable) - return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; + return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; else return true; @@ -1165,38 +1531,39 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { return false; case Instruction::Call: - return SelectCall(I); + return selectCall(I); case Instruction::BitCast: - return SelectBitCast(I); + return selectBitCast(I); case Instruction::FPToSI: - return SelectCast(I, ISD::FP_TO_SINT); + return selectCast(I, ISD::FP_TO_SINT); case Instruction::ZExt: - return SelectCast(I, ISD::ZERO_EXTEND); + return selectCast(I, ISD::ZERO_EXTEND); case Instruction::SExt: - return SelectCast(I, ISD::SIGN_EXTEND); + return selectCast(I, ISD::SIGN_EXTEND); case Instruction::Trunc: - return SelectCast(I, ISD::TRUNCATE); + return selectCast(I, ISD::TRUNCATE); case Instruction::SIToFP: - return SelectCast(I, ISD::SINT_TO_FP); + return selectCast(I, ISD::SINT_TO_FP); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); if (DstVT.bitsGT(SrcVT)) - return SelectCast(I, ISD::ZERO_EXTEND); + return selectCast(I, ISD::ZERO_EXTEND); if (DstVT.bitsLT(SrcVT)) - return SelectCast(I, ISD::TRUNCATE); + return selectCast(I, ISD::TRUNCATE); unsigned Reg = getRegForValue(I->getOperand(0)); - if (Reg == 0) return false; - UpdateValueMap(I, Reg); + if (!Reg) + return false; + updateValueMap(I, Reg); return true; } case Instruction::ExtractValue: - return SelectExtractValue(I); + return selectExtractValue(I); case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); @@ -1207,83 +1574,72 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) - : FuncInfo(funcInfo), - MF(funcInfo.MF), - MRI(FuncInfo.MF->getRegInfo()), - MFI(*FuncInfo.MF->getFrameInfo()), - MCP(*FuncInfo.MF->getConstantPool()), - TM(FuncInfo.MF->getTarget()), - DL(*TM.getDataLayout()), - TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()), - TRI(*TM.getRegisterInfo()), - LibInfo(libInfo) { -} +FastISel::FastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo, + bool SkipTargetIndependentISel) + : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()), + TII(*MF->getSubtarget().getInstrInfo()), + TLI(*MF->getSubtarget().getTargetLowering()), + TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), + SkipTargetIndependentISel(SkipTargetIndependentISel) {} FastISel::~FastISel() {} -bool FastISel::FastLowerArguments() { +bool FastISel::fastLowerArguments() { return false; } + +bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; } + +bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { return false; } -unsigned FastISel::FastEmit_(MVT, MVT, - unsigned) { - return 0; -} +unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; } -unsigned FastISel::FastEmit_r(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/) { +unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/) { return 0; } -unsigned FastISel::FastEmit_rr(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - unsigned /*Op1*/, bool /*Op1IsKill*/) { +unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/) { return 0; } -unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_f(MVT, MVT, - unsigned, const ConstantFP * /*FPImm*/) { +unsigned FastISel::fastEmit_f(MVT, MVT, unsigned, + const ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_ri(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_rf(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, +unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, const ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_rri(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - unsigned /*Op1*/, bool /*Op1IsKill*/, - uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/, uint64_t /*Imm*/) { return 0; } -/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries -/// to emit an instruction with an immediate operand using FastEmit_ri. +/// This method is a wrapper of fastEmit_ri. It first tries to emit an +/// instruction with an immediate operand using fastEmit_ri. /// If that fails, it materializes the immediate into a register and try -/// FastEmit_rr instead. -unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, - unsigned Op0, bool Op0IsKill, - uint64_t Imm, MVT ImmType) { +/// fastEmit_rr instead. +unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, + bool Op0IsKill, uint64_t Imm, MVT ImmType) { // If this is a multiply by a power of two, emit this as a shift left. if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { Opcode = ISD::SHL; @@ -1301,30 +1657,29 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, return 0; // First check if immediate type is legal. If not, we can't use the ri form. - unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); - if (ResultReg != 0) + unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + if (ResultReg) return ResultReg; - unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - if (MaterialReg == 0) { + unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + if (!MaterialReg) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. - IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), - VT.getSizeInBits()); + IntegerType *ITy = + IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); - assert (MaterialReg != 0 && "Unable to materialize imm."); - if (MaterialReg == 0) return 0; + if (!MaterialReg) + return 0; } - return FastEmit_rr(VT, VT, Opcode, - Op0, Op0IsKill, - MaterialReg, /*Kill=*/true); + return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, + /*IsKill=*/true); } -unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { +unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { return MRI.createVirtualRegister(RC); } -unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, - unsigned Op, unsigned OpNum) { +unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, + unsigned OpNum) { if (TargetRegisterInfo::isVirtualRegister(Op)) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); @@ -1340,8 +1695,8 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, return Op; } -unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, - const TargetRegisterClass* RC) { +unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); @@ -1349,9 +1704,9 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill) { +unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1359,10 +1714,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } @@ -1370,10 +1725,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill) { +unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1382,23 +1737,23 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - unsigned Op2, bool Op2IsKill) { +unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, unsigned Op2, + bool Op2IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1408,48 +1763,46 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); - RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF); - MRI.constrainRegClass(Op0, RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm1, uint64_t Imm2) { +unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm1, + uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1457,24 +1810,23 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm1) - .addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm1) - .addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm) { +unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, const ConstantFP *FPImm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1482,23 +1834,22 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addFPImm(FPImm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addFPImm(FPImm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addFPImm(FPImm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addFPImm(FPImm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1507,25 +1858,25 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, +unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm1, uint64_t Imm2) { + unsigned Op0, bool Op0IsKill, unsigned Op1, + bool Op1IsKill, uint64_t Imm1, + uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1534,28 +1885,30 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm1).addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm1) + .addImm(Imm2); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm1).addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1564,41 +1917,41 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm1, uint64_t Imm2) { +unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, uint64_t Imm1, + uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addImm(Imm1).addImm(Imm2); + .addImm(Imm1) + .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, bool Op0IsKill, - uint32_t Idx) { +unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, + bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(TargetRegisterInfo::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill), Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } -/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op -/// with all but the least significant bit set to zero. -unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { - return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); +/// Emit MachineInstrs to compute the value of Op with all but the least +/// significant bit set to zero. +unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); } /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. @@ -1607,22 +1960,24 @@ unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { /// nodes as input. We cannot just directly add them, because expansion /// might result in multiple MBB's for one BB. As such, the start of the /// BB might correspond to a different MBB than the end. -bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { +bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; - unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); + FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); - if (!isa(SuccBB->begin())) continue; + if (!isa(SuccBB->begin())) + continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. - if (!SuccsHandled.insert(SuccMBB)) continue; + if (!SuccsHandled.insert(SuccMBB).second) + continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); @@ -1630,10 +1985,11 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); - const PHINode *PN = dyn_cast(I); ++I) { + const auto *PN = dyn_cast(I); ++I) { // Ignore dead phi's. - if (PN->use_empty()) continue; + if (PN->use_empty()) + continue; // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, @@ -1644,10 +2000,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. - if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) - VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); - else { - FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } } @@ -1657,12 +2011,12 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. DbgLoc = PN->getDebugLoc(); - if (const Instruction *Inst = dyn_cast(PHIOp)) + if (const auto *Inst = dyn_cast(PHIOp)) DbgLoc = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); - if (Reg == 0) { - FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + if (!Reg) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); @@ -1675,17 +2029,17 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { assert(LI->hasOneUse() && - "tryToFoldLoad expected a LoadInst with a single use"); + "tryToFoldLoad expected a LoadInst with a single use"); // We know that the load has a single use, but don't know what it is. If it // isn't one of the folded instructions, then we can't succeed here. Handle // this by scanning the single-use users of the load until we get to FoldInst. - unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. const Instruction *TheUser = LI->user_back(); - while (TheUser != FoldInst && // Scan up until we find FoldInst. + while (TheUser != FoldInst && // Scan up until we find FoldInst. // Stay in the right block. TheUser->getParent() == FoldInst->getParent() && - --MaxUsers) { // Don't scan too far. + --MaxUsers) { // Don't scan too far. // If there are multiple or no uses of this instruction, then bail out. if (!TheUser->hasOneUse()) return false; @@ -1707,7 +2061,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // then there actually was no reference to it. Perhaps the load is referenced // by a dead instruction. unsigned LoadReg = getRegForValue(LI); - if (LoadReg == 0) + if (!LoadReg) return false; // We can't fold if this vreg has no uses or more than one use. Multiple uses @@ -1765,19 +2119,20 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags = MachineMemOperand::MOStore; Ptr = SI->getPointerOperand(); ValTy = SI->getValueOperand()->getType(); - } else { + } else return nullptr; - } - bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr; - bool IsInvariant = I->getMetadata("invariant.load") != nullptr; - const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa); + bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; + bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); - if (Alignment == 0) // Ensure that codegen never sees alignment 0. + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. Alignment = DL.getABITypeAlignment(ValTy); - unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy); + unsigned Size = DL.getTypeStoreSize(ValTy); if (IsVolatile) Flags |= MachineMemOperand::MOVolatile; @@ -1787,5 +2142,45 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags |= MachineMemOperand::MOInvariant; return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, - Alignment, TBAAInfo, Ranges); + Alignment, AAInfo, Ranges); +} + +CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const { + // If both operands are the same, then try to optimize or fold the cmp. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (CI->getOperand(0) != CI->getOperand(1)) + return Predicate; + + switch (Predicate) { + default: llvm_unreachable("Invalid predicate!"); + case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; + + case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; + } + + return Predicate; } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index ae124e8..86b9542 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -55,58 +56,71 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } +static ISD::NodeType getPreferredExtendForValue(const Value *V) { + // For the users of the source value being used for compare instruction, if + // the number of signed predicate is greater than unsigned predicate, we + // prefer to use SIGN_EXTEND. + // + // With this optimization, we would be able to reduce some redundant sign or + // zero extension instruction, and eventually more machine CSE opportunities + // can be exposed. + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + unsigned NumOfSigned = 0, NumOfUnsigned = 0; + for (const User *U : V->users()) { + if (const auto *CI = dyn_cast(U)) { + NumOfSigned += CI->isSigned(); + NumOfUnsigned += CI->isUnsigned(); + } + } + if (NumOfSigned > NumOfUnsigned) + ExtendKind = ISD::SIGN_EXTEND; + + return ExtendKind; +} + void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { - const TargetLowering *TLI = TM.getTargetLowering(); - Fn = &fn; MF = &mf; + TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); // Check whether the function can return without sret-demotion. SmallVector Outs; GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI); CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), - Outs, Fn->getContext()); + Fn->isVarArg(), Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const AllocaInst *AI = dyn_cast(I)) { - // Don't fold inalloca allocas or other dynamic allocas into the initial - // stack frame allocation, even if they are in the entry block. - if (!AI->isStaticAlloca()) - continue; - - if (const ConstantInt *CUI = dyn_cast(AI->getArraySize())) { - Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), - AI->getAlignment()); - - TySize *= CUI->getZExtValue(); // Get total allocated size. - if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - - StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); - } - } - for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - // Look for dynamic allocas. if (const AllocaInst *AI = dyn_cast(I)) { - if (!AI->isStaticAlloca()) { + // Static allocas can be folded into the initial stack frame adjustment. + if (AI->isStaticAlloca()) { + const ConstantInt *CUI = cast(AI->getArraySize()); + Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + + } else { unsigned Align = std::max( (unsigned)TLI->getDataLayout()->getPrefTypeAlignment( AI->getAllocatedType()), AI->getAlignment()); - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + MF->getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. @@ -126,9 +140,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); - std::pair PhysReg = - TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, - Op.ConstraintVT); + std::pair PhysReg = + TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, + Op.ConstraintVT); if (PhysReg.first == SP) MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); } @@ -136,6 +150,21 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } + // Look for calls to the @llvm.va_start intrinsic. We can omit some + // prologue boilerplate for variadic functions that don't examine their + // arguments. + if (const auto *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + MF->getFrameInfo()->setHasVAStart(true); + } + + // If we have a musttail call in a variadic funciton, we need to ensure we + // forward implicit register parameters. + if (const auto *CI = dyn_cast(I)) { + if (CI->isMustTailCall() && Fn->isVarArg()) + MF->getFrameInfo()->setHasMustTailInVarArgFunc(true); + } + // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) @@ -166,13 +195,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, StaticAllocaMap.find(AI); if (SI != StaticAllocaMap.end()) { // Check for VLAs. int FI = SI->second; - MMI.setVariableDbgInfo(DI->getVariable(), + MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(), FI, DI->getDebugLoc()); } } } } } + + // Decide the preferred extend type for a value. + PreferredExtendType[I] = getPreferredExtendForValue(I); } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This @@ -208,7 +240,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); PHIReg += NumRegisters; @@ -241,12 +273,13 @@ void FunctionLoweringInfo::clear() { ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); + PreferredExtendType.clear(); } /// CreateReg - Allocate a single virtual register for the given type. unsigned FunctionLoweringInfo::CreateReg(MVT VT) { - return RegInfo-> - createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT)); + return RegInfo->createVirtualRegister( + MF->getSubtarget().getTargetLowering()->getRegClassFor(VT)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -257,7 +290,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector ValueVTs; ComputeValueVTs(*TLI, Ty, ValueVTs); @@ -306,8 +339,6 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (!Ty->isIntegerTy() || Ty->isVectorTy()) return; - const TargetLowering *TLI = TM.getTargetLowering(); - SmallVector ValueVTs; ComputeValueVTs(*TLI, Ty, ValueVTs); assert(ValueVTs.size() == 1 && @@ -452,7 +483,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. - std::vector TyInfo; + std::vector TyInfo; unsigned N = I.getNumArgOperands(); for (unsigned i = N - 1; i > 1; --i) { @@ -510,14 +541,14 @@ void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, Value *Val = I.getClause(i - 1); if (I.isCatch(i - 1)) { MMI.addCatchTypeInfo(MBB, - dyn_cast(Val->stripPointerCasts())); + dyn_cast(Val->stripPointerCasts())); } else { // Add filters in a list. Constant *CVal = cast(Val); - SmallVector FilterList; + SmallVector FilterList; for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) - FilterList.push_back(cast((*II)->stripPointerCasts())); + FilterList.push_back(cast((*II)->stripPointerCasts())); MMI.addFilterTypeInfo(MBB, FilterList); } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 7c124b8..a65f33e 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -27,7 +27,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "instr-emitter" @@ -265,12 +265,16 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MIB.addReg(VRBase, RegState::Define); } - SDValue Op(Node, i); - if (IsClone) - VRBaseMap.erase(Op); - bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - (void)isNew; // Silence compiler warning. - assert(isNew && "Node emitted out of order - early"); + // If this def corresponds to a result of the SDNode insert the VRBase into + // the lookup map. + if (i < NumResults) { + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } } } @@ -402,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = TM->getDataLayout()->getPrefTypeAlignment(Type); + Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = TM->getDataLayout()->getTypeAllocSize(Type); + Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type); } } @@ -643,14 +647,18 @@ MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap &VRBaseMap) { uint64_t Offset = SD->getOffset(); - MDNode* MDPtr = SD->getMDPtr(); + MDNode *Var = SD->getVariable(); + MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(SD->getFrameIx()) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -696,7 +704,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U, RegState::Debug); } - MIB.addMetadata(MDPtr); + MIB.addMetadata(Var); + MIB.addMetadata(Expr); return &*MIB; } @@ -859,9 +868,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. -#ifdef NDEBUG if (II.hasPostISelHook()) -#endif TLI->AdjustInstrPostInstrSelection(MIB, Node); } @@ -1013,11 +1020,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /// at the given position in the given block. InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos) - : MF(mbb->getParent()), - MRI(&MF->getRegInfo()), - TM(&MF->getTarget()), - TII(TM->getInstrInfo()), - TRI(TM->getRegisterInfo()), - TLI(TM->getTargetLowering()), - MBB(mbb), InsertPos(insertpos) { -} + : MF(mbb->getParent()), MRI(&MF->getRegInfo()), + TII(MF->getSubtarget().getInstrInfo()), + TRI(MF->getSubtarget().getRegisterInfo()), + TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), + InsertPos(insertpos) {} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 920dda8..7b86f7d 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef INSTREMITTER_H -#define INSTREMITTER_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -29,7 +29,6 @@ class SDDbgValue; class InstrEmitter { MachineFunction *MF; MachineRegisterInfo *MRI; - const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const TargetLowering *TLI; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index c0e8c8c..5d17a5f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -33,8 +34,11 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "legalizedag" + //===----------------------------------------------------------------------===// /// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves @@ -48,16 +52,17 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { +class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - /// LegalizePosition - The iterator for walking through the node list. - SelectionDAG::allnodes_iterator LegalizePosition; + /// \brief The set of nodes which have already been legalized. We hold a + /// reference to it in order to update as necessary on node deletion. + SmallPtrSetImpl &LegalizedNodes; - /// LegalizedNodes - The set of nodes which have already been legalized. - SmallPtrSet LegalizedNodes; + /// \brief A set of all the nodes updated during legalization. + SmallSetVector *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); @@ -66,14 +71,16 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { // Libcall insertion helpers. public: - explicit SelectionDAGLegalize(SelectionDAG &DAG); + SelectionDAGLegalize(SelectionDAG &DAG, + SmallPtrSetImpl &LegalizedNodes, + SmallSetVector *UpdatedNodes = nullptr) + : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), + LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} - void LegalizeDAG(); - -private: - /// LegalizeOp - Legalizes the given operation. + /// \brief Legalizes the given operation. void LegalizeOp(SDNode *Node); +private: SDValue OptimizeFloatStore(StoreSDNode *ST); void LegalizeLoadOps(SDNode *Node); @@ -145,37 +152,49 @@ private: void ExpandNode(SDNode *Node); void PromoteNode(SDNode *Node); - void ForgetNode(SDNode *N) { - LegalizedNodes.erase(N); - if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) - ++LegalizePosition; - } - public: - // DAGUpdateListener implementation. - void NodeDeleted(SDNode *N, SDNode *E) override { - ForgetNode(N); - } - void NodeUpdated(SDNode *N) override {} - // Node replacement helpers void ReplacedNode(SDNode *N) { - if (N->use_empty()) { - DAG.RemoveDeadNode(N); - } else { - ForgetNode(N); - } + LegalizedNodes.erase(N); + if (UpdatedNodes) + UpdatedNodes->insert(N); } void ReplaceNode(SDNode *Old, SDNode *New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + assert(Old->getNumValues() == New->getNumValues() && + "Replacing one node with another that produces a different number " + "of values!"); DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i)); + if (UpdatedNodes) + UpdatedNodes->insert(New); ReplacedNode(Old); } void ReplaceNode(SDValue Old, SDValue New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + DAG.ReplaceAllUsesWith(Old, New); + DAG.TransferDbgValues(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); } void ReplaceNode(SDNode *Old, const SDValue *New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); + DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + DEBUG(dbgs() << (i == 0 ? " with: " + : " and: "); + New[i]->dump(&DAG)); + DAG.TransferDbgValues(SDValue(Old, i), New[i]); + if (UpdatedNodes) + UpdatedNodes->insert(New[i].getNode()); + } ReplacedNode(Old); } }; @@ -213,40 +232,6 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); } -SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) - : SelectionDAG::DAGUpdateListener(dag), - TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag) { -} - -void SelectionDAGLegalize::LegalizeDAG() { - DAG.AssignTopologicalOrder(); - - // Visit all the nodes. We start in topological order, so that we see - // nodes with their original operands intact. Legalization can produce - // new nodes which may themselves need to be legalized. Iterate until all - // nodes have been legalized. - for (;;) { - bool AnyLegalized = false; - for (LegalizePosition = DAG.allnodes_end(); - LegalizePosition != DAG.allnodes_begin(); ) { - --LegalizePosition; - - SDNode *N = LegalizePosition; - if (LegalizedNodes.insert(N)) { - AnyLegalized = true; - LegalizeOp(N); - } - } - if (!AnyLegalized) - break; - - } - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. SDValue @@ -270,7 +255,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { EVT OrigVT = VT; EVT SVT = VT; - while (SVT != MVT::f32) { + while (SVT != MVT::f32 && SVT != MVT::f16) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from @@ -291,7 +276,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); + VT, false, false, false, Alignment); return Result; } SDValue Result = @@ -377,7 +362,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, MachinePointerInfo(), - MemVT, false, false, 0); + MemVT, false, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo() @@ -385,7 +370,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, MemVT, ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset), - ST->getTBAAInfo())); + ST->getAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); @@ -417,7 +402,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment, ST->getTBAAInfo()); + Alignment, ST->getAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -476,7 +461,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), MinAlign(LD->getAlignment(), Offset), - LD->getTBAAInfo()); + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo(), false, false, 0)); @@ -494,8 +479,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), MinAlign(LD->getAlignment(), Offset), - LD->getTBAAInfo()); + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -508,7 +494,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT, false, false, 0); + MachinePointerInfo(), LoadedVT, false,false, false, + 0); // Callers expect a MERGE_VALUES node. ValResult = Load; @@ -538,25 +525,27 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), Alignment, + LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), - LD->getTBAAInfo()); + LD->isNonTemporal(),LD->isInvariant(), + MinAlign(Alignment, IncrementSize), LD->getAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), Alignment, + LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), - LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), + MinAlign(Alignment, IncrementSize), LD->getAAInfo()); } // aggregate the two parts @@ -659,7 +648,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && @@ -668,7 +657,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -677,7 +666,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -690,13 +679,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U), - TBAAInfo); + AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -714,7 +703,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -731,10 +720,11 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // If this is an unaligned store and the target doesn't support it, // expand it. unsigned AS = ST->getAddressSpace(); - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { + unsigned Align = ST->getAlignment(); + if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) + if (Align < ABIAlignment) ExpandUnalignedStore(cast(Node), DAG, TLI, this); } @@ -754,7 +744,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -777,7 +767,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -799,7 +789,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -811,7 +801,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -821,7 +811,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -830,7 +820,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); } // The order of the stores doesn't matter. @@ -842,12 +832,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { + if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) + if (Align < ABIAlignment) ExpandUnalignedStore(cast(Node), DAG, TLI, this); } break; @@ -868,7 +859,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -893,13 +884,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { unsigned AS = LD->getAddressSpace(); + unsigned Align = LD->getAlignment(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) { + if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ + if (Align < ABIAlignment){ ExpandUnalignedLoad(cast(Node), DAG, TLI, RVal, RChain); } } @@ -928,6 +920,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(RVal.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + if (UpdatedNodes) { + UpdatedNodes->insert(RVal.getNode()); + UpdatedNodes->insert(RChain.getNode()); + } ReplacedNode(Node); } return; @@ -938,7 +934,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -965,7 +962,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + NVT, isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); Ch = Result.getValue(1); // The chain. @@ -1002,7 +1000,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1010,8 +1008,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + ExtraVT, isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1031,7 +1029,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1040,8 +1038,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + ExtraVT, isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1080,12 +1078,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // it, expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); - if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) { + unsigned Align = LD->getAlignment(); + if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ + if (Align < ABIAlignment){ ExpandUnalignedLoad(cast(Node), DAG, TLI, Value, Chain); } @@ -1148,6 +1147,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(Value.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Value.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } ReplacedNode(Node); } } @@ -1155,6 +1158,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return; @@ -1186,6 +1191,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action != TargetLowering::Promote) Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::FP_TO_FP16: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -1334,10 +1340,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } if (NewNode != Node) { - DAG.ReplaceAllUsesWith(Node, NewNode); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); - ReplacedNode(Node); + ReplaceNode(Node, NewNode); Node = NewNode; } switch (Action) { @@ -1348,19 +1351,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // a complete mess. SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) { - SmallVector ResultVals; - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { - if (e == 1) - ResultVals.push_back(Res); - else - ResultVals.push_back(Res.getValue(i)); - } - if (Res.getNode() != Node || Res.getResNo() != 0) { - DAG.ReplaceAllUsesWith(Node, ResultVals.data()); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); - ReplacedNode(Node); + if (!(Res.getNode() != Node || Res.getResNo() != 0)) + return; + + if (Node->getNumValues() == 1) { + // We can just directly replace this node with the lowered value. + ReplaceNode(SDValue(Node, 0), Res); + return; } + + SmallVector ResultVals; + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Res.getValue(i)); + ReplaceNode(Node, ResultVals.data()); return; } } @@ -1448,7 +1451,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), Vec.getValueType().getVectorElementType(), - false, false, 0); + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { @@ -1483,7 +1486,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { StackPtr); // Store the subvector. - Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr, + Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo(), false, false, 0); // Finally, load the updated vector. @@ -1623,7 +1626,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, @@ -1797,7 +1801,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, - PtrInfo, SlotVT, false, false, DestAlign); + PtrInfo, SlotVT, false, false, false, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1877,7 +1881,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, ShuffleVec.data()); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; - NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices)); + NewIntermedVals.push_back( + std::make_pair(Shuffle, std::move(FinalIndices))); } // If we had an odd number of defined values, then append the last @@ -2580,7 +2585,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment); + MVT::f32, false, false, false, Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); @@ -2782,7 +2787,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // x = x | (x >>32); // for 64-bit input // return popcount(~x); // - // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + // Ref: "Hacker's Delight" by Henry Warren EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT); unsigned len = VT.getSizeInBits(); @@ -2801,7 +2806,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } - // see also http://www.hackersdelight.org/HDcode/ntz.cc + // Ref: "Hacker's Delight" by Henry Warren EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), @@ -3153,65 +3158,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; - case ISD::FP_TO_SINT: { - EVT VT = Node->getOperand(0).getValueType(); - EVT NVT = Node->getValueType(0); - - // FIXME: Only f32 to i64 conversions are supported. - if (VT != MVT::f32 || NVT != MVT::i64) - break; - - // Expand f32 -> i64 conversion - // This algorithm comes from compiler-rt's implementation of fixsfdi: - // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits()); - SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); - SDValue ExponentLoBit = DAG.getConstant(23, IntVT); - SDValue Bias = DAG.getConstant(127, IntVT); - SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), - IntVT); - SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); - SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); - - SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); - - SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), - DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT))); - SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); - - SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), - DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT))); - Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); - - SDValue R = DAG.getNode(ISD::OR, dl, IntVT, - DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), - DAG.getConstant(0x00800000, IntVT)); - - R = DAG.getZExtOrTrunc(R, dl, NVT); - - - R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, - DAG.getNode(ISD::SHL, dl, NVT, R, - DAG.getZExtOrTrunc( - DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), - dl, TLI.getShiftAmountTy(IntVT))), - DAG.getNode(ISD::SRL, dl, NVT, R, - DAG.getZExtOrTrunc( - DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), - dl, TLI.getShiftAmountTy(IntVT))), - ISD::SETGT); - - SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, - DAG.getNode(ISD::XOR, dl, NVT, R, Sign), - Sign); - - Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), - DAG.getConstant(0, NVT), Ret, ISD::SETLT)); + case ISD::FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); break; - } case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); @@ -3450,6 +3400,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } + case ISD::FMINNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); + break; + case ISD::FMAXNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); + break; case ISD::FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, @@ -3568,12 +3528,38 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; - case ISD::FP16_TO_FP32: - Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); break; - case ISD::FP32_TO_FP16: - Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false)); + case ISD::FP16_TO_FP: { + if (Node->getValueType(0) == MVT::f32) { + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + break; + } + + // We can extend to types bigger than f32 in two steps without changing the + // result. Since "f16 -> f32" is much more commonly available, give CodeGen + // the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); break; + } + case ISD::FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); + Results.push_back(ExpandLibCall(LC, Node, false)); + break; + } case ISD::ConstantFP: { ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. @@ -3584,12 +3570,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::FSUB: { EVT VT = Node->getValueType(0); - assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && - TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && - "Don't know how to expand this FP subtraction!"); - Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); - Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); - Results.push_back(Tmp1); + if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + } else { + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + } break; } case ISD::SUB: { @@ -3844,9 +3834,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, delete the - // node. The above EXTRACT_ELEMENT nodes should have been folded. - DAG.DeleteNode(Ret.getNode()); + // generally permitted during this phase of legalization, make sure the + // node has no more uses. The above EXTRACT_ELEMENT nodes should have been + // folded. + assert(Ret->use_empty() && + "Unexpected uses of illegally type from expanded lib call."); } if (isSigned) { @@ -3907,7 +3899,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, MachinePointerInfo::getJumpTable(), MemVT, - false, false, 0); + false, false, false, 0); Addr = LD; if (TM.getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: @@ -4217,6 +4209,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // use the new one. DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Tmp2.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } ReplacedNode(Node); break; } @@ -4293,6 +4289,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: case ISD::FDIV: case ISD::FREM: case ISD::FPOW: { @@ -4323,7 +4322,55 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // SelectionDAG::Legalize - This is the entry point for the file. // void SelectionDAG::Legalize() { - /// run - This is the main entry point to this class. - /// - SelectionDAGLegalize(*this).LegalizeDAG(); + AssignTopologicalOrder(); + + SmallPtrSet LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes); + + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (auto NI = allnodes_end(); NI != allnodes_begin();) { + --NI; + + SDNode *N = NI; + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + continue; + } + + if (LegalizedNodes.insert(N).second) { + AnyLegalized = true; + Legalizer.LegalizeOp(N); + + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + } + } + } + if (!AnyLegalized) + break; + + } + + // Remove dead nodes now. + RemoveDeadNodes(); +} + +bool SelectionDAG::LegalizeOp(SDNode *N, + SmallSetVector &UpdatedNodes) { + SmallPtrSet LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes); + + // Directly insert the node in question, and legalize it. This will recurse + // as needed through operands. + LegalizedNodes.insert(N); + Legalizer.LegalizeOp(N); + + return LegalizedNodes.count(N); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6b8fec6..4591e79 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -68,6 +68,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; @@ -85,7 +87,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; - case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break; + case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; @@ -153,6 +155,32 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + NVT, Ops, 2, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + NVT, Ops, 2, false, SDLoc(N)).first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), @@ -373,23 +401,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); + + // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's + // entirely possible for both f16 and f32 to be legal, so use the fully + // hard-float FP_EXTEND rather than FP16_TO_FP. + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) + SoftenFloatResult(Op.getNode(), 0); + } + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) + Op = GetSoftenedFloat(Op); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special // nodes? -SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { + EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - SDLoc(N)).first; + SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1, + false, SDLoc(N)).first; + if (N->getValueType(0) == MVT::f32) + return Res32; + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); + if (N->getValueType(0) == MVT::f16) { + // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a + // storage-only type get a chance to select things. + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); + } + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; @@ -498,6 +551,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + if (N->getValueType(0) == MVT::f16) + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -520,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), NVT, L->isVolatile(), L->isNonTemporal(), false, L->getAlignment(), - L->getTBAAInfo()); + L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -533,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), L->isNonTemporal(), false, L->getAlignment(), - L->getTBAAInfo()); + L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -625,10 +681,11 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; + case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; - case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; @@ -654,11 +711,32 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { + // If we get here, the result must be legal but the source illegal. + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (SVT == MVT::f16) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); + + RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); + + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; +} + + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + // We actually deal with the partially-softened FP_TO_FP16 node too, which + // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); + EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; - RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -704,13 +782,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { - EVT RVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; -} - SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); @@ -813,6 +884,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; + case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; @@ -876,6 +949,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, ISD::SETEQ); } +void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index cffb0a1..b73bb0a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -99,7 +99,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; - case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break; + case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; case ISD::AND: case ISD::OR: @@ -225,10 +225,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(), N->getSynchScope()); - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - unsigned ChainOp = N->getNumValues() - 1; - ReplaceValueWith(SDValue(N, ChainOp), Res.getValue(ChainOp)); + // Update the use to N with the newly created Res. + for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i) + ReplaceValueWith(SDValue(N, i), Res.getValue(i)); return Res; } @@ -402,7 +401,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { DAG.getValueType(N->getValueType(0).getScalarType())); } -SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -827,7 +826,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = PromoteIntOp_STORE(cast(N), OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; - case ISD::FP16_TO_FP32: + case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -863,7 +862,26 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, switch (CCCode) { default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: - case ISD::SETNE: + case ISD::SETNE: { + SDValue OpL = GetPromotedInteger(NewLHS); + SDValue OpR = GetPromotedInteger(NewRHS); + + // We would prefer to promote the comparison operand with sign extension, + // if we find the operand is actually to truncate an AssertSext. With this + // optimization, we can avoid inserting real truncate instruction, which + // is redudant eventually. + if (OpL->getOpcode() == ISD::AssertSext && + cast(OpL->getOperand(1))->getVT() == NewLHS.getValueType() && + OpR->getOpcode() == ISD::AssertSext && + cast(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) { + NewLHS = OpL; + NewRHS = OpR; + } else { + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + } + break; + } case ISD::SETUGE: case ISD::SETUGT: case ISD::SETULE: @@ -947,7 +965,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && - "Legal vector of one illegal element?"); + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be @@ -1861,7 +1879,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); bool isInvariant = N->isInvariant(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1870,7 +1888,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + MemVT, isVolatile, isNonTemporal, isInvariant, + Alignment, AAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -1893,7 +1912,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1905,8 +1924,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1924,7 +1943,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -1933,8 +1953,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2711,7 +2731,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); SDValue Lo, Hi; @@ -2721,7 +2741,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment, TBAAInfo); + Alignment, AAInfo); } if (TLI.isLittleEndian()) { @@ -2729,7 +2749,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2742,7 +2762,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2770,7 +2790,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + HiVT, isVolatile, isNonTemporal, Alignment, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -2780,7 +2800,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2855,7 +2875,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { FudgePtr, MachinePointerInfo::getConstantPool(), MVT::f32, - false, false, Alignment); + false, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d0ca6f8..30f412b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SELECTIONDAG_LEGALIZETYPES_H -#define SELECTIONDAG_LEGALIZETYPES_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -122,8 +122,8 @@ public: explicit DAGTypeLegalizer(SelectionDAG &dag) : TLI(dag.getTargetLoweringInfo()), DAG(dag), ValueTypeActions(TLI.getValueTypeActions()) { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + "Too many value types for ValueTypeActions to hold!"); } /// run - This is the main entry point for the type legalizer. This does a @@ -237,7 +237,7 @@ private: SDValue PromoteIntRes_CTTZ(SDNode *N); SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); - SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); @@ -387,6 +387,8 @@ private: SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FMINNUM(SDNode *N); + SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); @@ -403,7 +405,7 @@ private: SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); SDValue SoftenFloatRes_FNEG(SDNode *N); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); - SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N); + SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); SDValue SoftenFloatRes_FPOW(SDNode *N); SDValue SoftenFloatRes_FPOWI(SDNode *N); @@ -425,10 +427,10 @@ private: bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N); SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N); - SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -450,6 +452,8 @@ private: void ExpandFloatResult(SDNode *N, unsigned ResNo); void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 7e2f7b6..38829b6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -256,13 +256,13 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -271,7 +271,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -470,7 +470,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); - const MDNode *TBAAInfo = St->getTBAAInfo(); + AAMDNodes AAInfo = St->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -482,14 +482,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 507e7ff..b5af7b7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -199,12 +199,29 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); - if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { - if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) + switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getMemoryVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); - Changed = true; - return LegalizeOp(ExpandLoad(Op)); - } + case TargetLowering::Custom: + if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { + Changed = true; + if (Lowered->getNumValues() != Op->getNumValues()) { + // This expanded to something other than the load. Assume the + // lowering code took care of any chain values, and just handle the + // returned value. + assert(Result.getValue(1).use_empty() && + "There are still live users of the old chain!"); + return LegalizeOp(Lowered); + } else { + return TranslateLegalizeResults(Op, Lowered); + } + } + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandLoad(Op)); + } } else if (Op.getOpcode() == ISD::STORE) { StoreSDNode *ST = cast(Op.getNode()); EVT StVT = ST->getMemoryVT(); @@ -273,6 +290,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -353,9 +372,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) { return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); } - // The rest of the time, vector "promotion" is basically just bitcasting and - // doing the operation in a different type. For example, x86 promotes - // ISD::AND on v2i32 to v1i64. + // There are currently two cases of vector promotion: + // 1) Bitcasting a vector of integers to a different type to a vector of the + // same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64. + // 2) Extending a vector of floats to a vector of the same number oflarger + // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); @@ -365,14 +386,23 @@ SDValue VectorLegalizer::Promote(SDValue Op) { for (unsigned j = 0; j != Op.getNumOperands(); ++j) { if (Op.getOperand(j).getValueType().isVector()) - Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); + if (Op.getOperand(j) + .getValueType() + .getVectorElementType() + .isFloatingPoint()) + Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); + else + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); else Operands[j] = Op.getOperand(j); } Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); - - return DAG.getNode(ISD::BITCAST, dl, VT, Op); + if (VT.isFloatingPoint() || + (VT.isVector() && VT.getVectorElementType().isFloatingPoint())) + return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0)); + else + return DAG.getNode(ISD::BITCAST, dl, VT, Op); } SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { @@ -480,7 +510,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment(), - LD->getTBAAInfo()); + LD->getAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -490,8 +520,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment(), - LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment(), LD->getAAInfo()); } RemainingBytes -= LoadBytes; @@ -561,8 +591,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Op.getNode()->getValueType(0).getScalarType(), Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment(), LD->getTBAAInfo()); + LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment(), LD->getAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, DAG.getConstant(Stride, BasePTR.getValueType())); @@ -593,7 +623,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); unsigned NumElem = StVT.getVectorNumElements(); // The type of the data we want to save @@ -621,7 +651,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, DAG.getConstant(Stride, BasePTR.getValueType())); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 71240fc..27f63d2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -106,6 +106,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FPOW: case ISD::FREM: case ISD::FSUB: @@ -223,7 +226,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), N->isInvariant(), N->getOriginalAlignment(), - N->getTBAAInfo()); + N->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -234,7 +237,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. EVT DestVT = N->getValueType(0).getVectorElementType(); - SDValue Op = GetScalarizedVector(N->getOperand(0)); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + // This is a workaround for targets where it's impossible to scalarize the + // result of a conversion, because the source type is legal. + // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} + // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is + // legal and was not scalarized. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } @@ -408,6 +427,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::TRUNCATE: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: @@ -451,11 +474,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { N->getValueType(0), Elt); } -/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs -/// to be scalarized, it must be <1 x ty>. Extend the element instead. +/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be +/// scalarized, it must be <1 x ty>. Do the operation on the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && - "Unexected vector type!"); + "Unexpected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(), Elt); @@ -509,12 +532,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getTBAAInfo()); + N->getAlignment(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment(), N->getTBAAInfo()); + N->getOriginalAlignment(), N->getAAInfo()); } /// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs @@ -627,6 +650,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::SDIV: case ISD::UDIV: case ISD::FDIV: @@ -868,6 +893,10 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, return; } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return; + // Spill the vector to the stack. EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); @@ -923,14 +952,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment, TBAAInfo); + isInvariant, Alignment, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -938,7 +967,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1349,6 +1378,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { Idx.getValueType())), 0); } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return SDValue(); + // Store the vector to the stack. EVT EltVT = VecVT.getVectorElementType(); SDLoc dl(N); @@ -1359,7 +1392,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT, false, false, 0); + MachinePointerInfo(), EltVT, false, false, false, 0); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1374,7 +1407,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); bool isNT = N->isNonTemporal(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); @@ -1385,10 +1418,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment, TBAAInfo); + LoMemVT, isVol, isNT, Alignment, AAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment, TBAAInfo); + isVol, isNT, Alignment, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -1397,11 +1430,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment, TBAAInfo); + HiMemVT, isVol, isNT, Alignment, AAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment, TBAAInfo); + isVol, isNT, Alignment, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -1575,6 +1608,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::OR: case ISD::SUB: case ISD::XOR: + case ISD::FMINNUM: + case ISD::FMAXNUM: Res = WidenVecRes_Binary(N); break; @@ -2737,7 +2772,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2748,7 +2783,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Align, - TBAAInfo); + AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2793,7 +2828,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, isInvariant, MinAlign(Align, Increment), - TBAAInfo); + AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector Loads; @@ -2809,7 +2844,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, isInvariant, MinAlign(Align, Increment), - TBAAInfo); + AAInfo); LdChain.push_back(L.getValue(1)); } @@ -2889,7 +2924,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2901,7 +2937,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo); + LdEltVT, isVolatile, isNonTemporal, isInvariant, + Align, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { @@ -2911,7 +2948,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, Align, TBAAInfo); + isVolatile, isNonTemporal, isInvariant, Align, + AAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -2934,7 +2972,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -2961,7 +2999,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; @@ -2981,7 +3019,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, @@ -3003,7 +3041,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -3027,7 +3065,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align, - TBAAInfo)); + AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), @@ -3038,7 +3076,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); } } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 624003f..db38b76 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -40,32 +41,29 @@ static cl::opt RegPressureThreshold( "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), cl::desc("Track reg pressure and switch priority to in-depth")); +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) + : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + TRI = STI.getRegisterInfo(); + TLI = IS->TLI; + TII = STI.getInstrInfo(); + ResourcesModel = TII->CreateTargetScheduleState(STI); + // This hard requirement could be relaxed, but for now + // do not let it procede. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); + I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); -ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : - Picker(this), - InstrItins(IS->getTargetLowering()->getTargetMachine().getInstrItineraryData()) -{ - TII = IS->getTargetLowering()->getTargetMachine().getInstrInfo(); - TRI = IS->getTargetLowering()->getTargetMachine().getRegisterInfo(); - TLI = IS->getTargetLowering(); - - const TargetMachine &tm = (*IS->MF).getTarget(); - ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr); - // This hard requirement could be relaxed, but for now - // do not let it procede. - assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); - - unsigned NumRC = TRI->getNumRegClasses(); - RegLimit.resize(NumRC); - RegPressure.resize(NumRC); - std::fill(RegLimit.begin(), RegLimit.end(), 0); - std::fill(RegPressure.begin(), RegPressure.end(), 0); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); - - ParallelLiveRanges = 0; - HorizontalVerticalBalance = 0; + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; } unsigned @@ -319,7 +317,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) { // If packet is now full, reset the state so in the next cycle // we start fresh. - if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { + if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { ResourcesModel->clearResources(); Packet.clear(); } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index ee54292..bce69d7 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H -#define LLVM_CODEGEN_SDNODEDBGVALUE_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugLoc.h" @@ -44,7 +44,8 @@ private: const Value *Const; // valid for constants unsigned FrameIx; // valid for stack objects } u; - MDNode *mdPtr; + MDNode *Var; + MDNode *Expr; bool IsIndirect; uint64_t Offset; DebugLoc DL; @@ -52,69 +53,72 @@ private: bool Invalid; public: // Constructor for non-constants. - SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, - bool indir, uint64_t off, DebugLoc dl, - unsigned O) : mdPtr(mdP), IsIndirect(indir), - Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, + uint64_t off, DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; } // Constructor for constants. - SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, - unsigned O) : - mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, + DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = CONST; u.Const = C; } // Constructor for frame indices. - SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, + unsigned O) + : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = FRAMEIX; u.FrameIx = FI; } // Returns the kind. - DbgValueKind getKind() { return kind; } + DbgValueKind getKind() const { return kind; } - // Returns the MDNode pointer. - MDNode *getMDPtr() { return mdPtr; } + // Returns the MDNode pointer for the variable. + MDNode *getVariable() const { return Var; } + + // Returns the MDNode pointer for the expression. + MDNode *getExpression() const { return Expr; } // Returns the SDNode* for a register ref - SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; } + SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } // Returns the ResNo for a register ref - unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; } + unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } // Returns the Value* for a constant - const Value *getConst() { assert (kind==CONST); return u.Const; } + const Value *getConst() const { assert (kind==CONST); return u.Const; } // Returns the FrameIx for a stack object - unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } + unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } // Returns whether this is an indirect value. - bool isIndirect() { return IsIndirect; } + bool isIndirect() const { return IsIndirect; } // Returns the offset. - uint64_t getOffset() { return Offset; } + uint64_t getOffset() const { return Offset; } // Returns the DebugLoc. - DebugLoc getDebugLoc() { return DL; } + DebugLoc getDebugLoc() const { return DL; } // Returns the SDNodeOrder. This is the order of the preceding node in the // input. - unsigned getOrder() { return Order; } + unsigned getOrder() const { return Order; } // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" // property. A SDDbgValue is invalid if the SDNode that produces the value is // deleted. void setIsInvalidated() { Invalid = true; } - bool isInvalidated() { return Invalid; } + bool isInvalidated() const { return Invalid; } }; } // end llvm namespace diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 4d8c2c7..61a3fd7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -221,7 +221,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue) return nullptr; else if (VT == MVT::Other) @@ -229,7 +229,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (VT == MVT::Glue) return nullptr; } @@ -431,17 +431,23 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) - break; - ++NumRes; + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } } - return N->getValueType(NumRes); + return N->getSimpleValueType(NumRes); } /// CheckForLiveRegDef - Return true and update live register vector if the @@ -454,7 +460,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { - if (RegAdded.insert(*AI)) { + if (RegAdded.insert(*AI).second) { LRegs.push_back(*AI); Added = true; } @@ -572,7 +578,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 13cfae7..8b54e656 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -30,8 +30,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -166,12 +166,11 @@ public: NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), Topo(SUnits, nullptr) { - const TargetMachine &tm = mf.getTarget(); + const TargetSubtargetInfo &STI = mf.getSubtarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); else - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( - tm.getSubtargetImpl(), this); + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } ~ScheduleDAGRRList() { @@ -946,7 +945,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue) return nullptr; else if (VT == MVT::Other) @@ -954,7 +953,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (VT == MVT::Glue) return nullptr; } @@ -1189,17 +1188,23 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) - break; - ++NumRes; + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } } - return N->getValueType(NumRes); + return N->getSimpleValueType(NumRes); } /// CheckForLiveRegDef - Return true and update live register vector if the @@ -1218,7 +1223,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, if (LiveRegDefs[*AliasI] == SU) continue; // Add Reg to the set of interfering live regs. - if (RegAdded.insert(*AliasI)) { + if (RegAdded.insert(*AliasI).second) { LRegs.push_back(*AliasI); } } @@ -1235,7 +1240,7 @@ static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, if (!LiveRegDefs[i]) continue; if (LiveRegDefs[i] == SU) continue; if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; - if (RegAdded.insert(i)) + if (RegAdded.insert(i).second) LRegs.push_back(i); } } @@ -1310,7 +1315,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { SDNode *Gen = LiveRegGens[CallResource]->getNode(); while (SDNode *Glued = Gen->getGluedNode()) Gen = Glued; - if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) + if (!IsChainDependent(Gen, Node, 0, TII) && + RegAdded.insert(CallResource).second) LRegs.push_back(CallResource); } } @@ -1373,7 +1379,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { Interferences.push_back(CurSU); } else { - assert(CurSU->isPending && "Intereferences are pending"); + assert(CurSU->isPending && "Interferences are pending"); // Update the interference with current live regs. LRegsPair.first->second = LRegs; } @@ -1439,7 +1445,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); @@ -1930,8 +1936,8 @@ void RegReductionPQBase::dumpRegPressure() const { unsigned Id = RC->getID(); unsigned RP = RegPressure[Id]; if (!RP) continue; - DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] - << '\n'); + DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " + << RegLimit[Id] << '\n'); } #endif } @@ -2754,7 +2760,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!SUImpDefs && !SURegMask) continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) @@ -2977,9 +2983,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { llvm::ScheduleDAGSDNodes * llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); @@ -2991,9 +2997,9 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); @@ -3005,10 +3011,10 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); @@ -3021,10 +3027,10 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index de910b7..8b9f618 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -29,7 +29,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -38,17 +37,17 @@ using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); -// This allows latency based scheduler to notice high latency instructions -// without a target itinerary. The choise if number here has more to do with -// balancing scheduler heursitics than with the actual machine latency. +// This allows the latency-based scheduler to notice high latency instructions +// without a target itinerary. The choice of number here has more to do with +// balancing scheduler heuristics than with the actual machine latency. static cl::opt HighLatencyCycles( "sched-high-latency-cycles", cl::Hidden, cl::init(10), cl::desc("Roughly estimate the number of cycles that 'long latency'" "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), - InstrItins(mf.getTarget().getInstrItineraryData()) {} + : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), + InstrItins(mf.getSubtarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// @@ -120,15 +119,20 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, return; unsigned ResNo = User->getOperand(2).getResNo(); - if (Def->isMachineOpcode()) { + if (Def->getOpcode() == ISD::CopyFromReg && + cast(Def->getOperand(1))->getReg() == Reg) { + PhysReg = Reg; + } else if (Def->isMachineOpcode()) { const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && - II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) PhysReg = Reg; - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); - Cost = RC->getCopyCost(); - } + } + + if (PhysReg != 0) { + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo)); + Cost = RC->getCopyCost(); } } @@ -136,7 +140,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, SmallVectorImpl &VTs, SDValue ExtraOper = SDValue()) { - SmallVector Ops; + SmallVector Ops; for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) Ops.push_back(N->getOperand(I)); @@ -226,7 +230,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); I != E && UseCount < 100; ++I, ++UseCount) { SDNode *User = *I; - if (User == Node || !Visited.insert(User)) + if (User == Node || !Visited.insert(User).second) continue; int64_t Offset1, Offset2; if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || @@ -339,7 +343,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Add all operands to the worklist unless they've already been added. for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) - if (Visited.insert(NI->getOperand(i).getNode())) + if (Visited.insert(NI->getOperand(i).getNode()).second) Worklist.push_back(NI->getOperand(i).getNode()); if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. @@ -425,7 +429,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { } void ScheduleDAGSDNodes::AddSchedEdges() { - const TargetSubtargetInfo &ST = TM.getSubtarget(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = forceUnitLatencies(); @@ -733,7 +737,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl > &Orders, SmallSet &Seen) { unsigned Order = N->getIROrder(); - if (!Order || !Seen.insert(Order)) { + if (!Order || !Seen.insert(Order).second) { // Process any valid SDDbgValues even if node does not have any order // assigned. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 39ebadf..2cd1f4b 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SCHEDULEDAGSDNODES_H -#define SCHEDULEDAGSDNODES_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 4589b0c..418b58e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -71,10 +72,8 @@ public: AliasAnalysis *aa, SchedulingPriorityQueue *availqueue) : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { - - const TargetMachine &tm = mf.getTarget(); - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( - tm.getSubtargetImpl(), this); + const TargetSubtargetInfo &STI = mf.getSubtarget(); + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } ~ScheduleDAGVLIW() { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index daff1f2..7961e66 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -46,6 +46,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include #include @@ -95,7 +96,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, /// BUILD_VECTOR where all of the elements are ~0 or undef. bool ISD::isBuildVectorAllOnes(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BITCAST) + while (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -143,7 +144,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { /// BUILD_VECTOR where all of the elements are 0 or undef. bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BITCAST) + while (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -686,6 +687,15 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { DeallocateNode(N); } +void SDDbgInfo::erase(const SDNode *Node) { + DbgValMapType::iterator I = DbgValMap.find(Node); + if (I == DbgValMap.end()) + return; + for (auto &Val: I->second) + Val->setIsInvalidated(); + DbgValMap.erase(I); +} + void SelectionDAG::DeallocateNode(SDNode *N) { if (N->OperandsNeedDelete) delete[] N->OperandList; @@ -696,10 +706,60 @@ void SelectionDAG::DeallocateNode(SDNode *N) { NodeAllocator.Deallocate(AllNodes.remove(N)); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. - ArrayRef DbgVals = DbgInfo->getSDDbgValues(N); - for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) - DbgVals[i]->setIsInvalidated(); + // If any of the SDDbgValue nodes refer to this SDNode, invalidate + // them and forget about that node. + DbgInfo->erase(N); +} + +#ifndef NDEBUG +/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + EVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + assert(I->getValueType() == N->getOperand(0).getValueType() && + "Operands must all have the same type"); + } + break; + } + } +} +#endif // NDEBUG + +/// \brief Insert a newly allocated node into the DAG. +/// +/// Handles insertion into the all nodes list and CSE map, as well as +/// verification and other common operations when a new node is allocated. +void SelectionDAG::InsertNode(SDNode *N) { + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -839,83 +899,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef Ops, return Node; } -#ifndef NDEBUG -/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid. -static void VerifyNodeCommon(SDNode *N) { - switch (N->getOpcode()) { - default: - break; - case ISD::BUILD_PAIR: { - EVT VT = N->getValueType(0); - assert(N->getNumValues() == 1 && "Too many results!"); - assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && - "Wrong return type!"); - assert(N->getNumOperands() == 2 && "Wrong number of operands!"); - assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && - "Mismatched operand types!"); - assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && - "Wrong operand type!"); - assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && - "Wrong return type size"); - break; - } - case ISD::BUILD_VECTOR: { - assert(N->getNumValues() == 1 && "Too many results!"); - assert(N->getValueType(0).isVector() && "Wrong return type!"); - assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && - "Wrong number of operands!"); - EVT EltVT = N->getValueType(0).getVectorElementType(); - for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { - assert((I->getValueType() == EltVT || - (EltVT.isInteger() && I->getValueType().isInteger() && - EltVT.bitsLE(I->getValueType()))) && - "Wrong operand type!"); - assert(I->getValueType() == N->getOperand(0).getValueType() && - "Operands must all have the same type"); - } - break; - } - } -} - -/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. -static void VerifySDNode(SDNode *N) { - // The SDNode allocators cannot be used to allocate nodes with fields that are - // not present in an SDNode! - assert(!isa(N) && "Bad MemSDNode!"); - assert(!isa(N) && "Bad ShuffleVectorSDNode!"); - assert(!isa(N) && "Bad ConstantSDNode!"); - assert(!isa(N) && "Bad ConstantFPSDNode!"); - assert(!isa(N) && "Bad GlobalAddressSDNode!"); - assert(!isa(N) && "Bad FrameIndexSDNode!"); - assert(!isa(N) && "Bad JumpTableSDNode!"); - assert(!isa(N) && "Bad ConstantPoolSDNode!"); - assert(!isa(N) && "Bad BasicBlockSDNode!"); - assert(!isa(N) && "Bad SrcValueSDNode!"); - assert(!isa(N) && "Bad MDNodeSDNode!"); - assert(!isa(N) && "Bad RegisterSDNode!"); - assert(!isa(N) && "Bad BlockAddressSDNode!"); - assert(!isa(N) && "Bad EHLabelSDNode!"); - assert(!isa(N) && "Bad ExternalSymbolSDNode!"); - assert(!isa(N) && "Bad CondCodeSDNode!"); - assert(!isa(N) && "Bad CvtRndSatSDNode!"); - assert(!isa(N) && "Bad VTSDNode!"); - assert(!isa(N) && "Bad MachineSDNode!"); - - VerifyNodeCommon(N); -} - -/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is -/// invalid. -static void VerifyMachineNode(SDNode *N) { - // The MachineNode allocators cannot be used to allocate nodes with fields - // that are not present in a MachineNode! - // Currently there are no such nodes. - - VerifyNodeCommon(N); -} -#endif // NDEBUG - /// getEVTAlignment - Compute the default alignment value for the /// given type. /// @@ -924,22 +907,23 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty); + return TLI->getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL), - EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(nullptr) { + : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(nullptr) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) { +void SelectionDAG::init(MachineFunction &mf) { MF = &mf; - TLI = tli; + TLI = getSubtarget().getTargetLowering(); + TSI = getSubtarget().getSelectionDAGInfo(); Context = &mf.getFunction()->getContext(); } @@ -1108,8 +1092,6 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, EVT EltVT = VT.getScalarType(); const ConstantInt *Elt = &Val; - const TargetLowering *TLI = TM.getTargetLowering(); - // In some cases the vector type is legal but the element type is illegal and // needs to be promoted, for example v8i8 on ARM. In this case, promote the // inserted value (the type does not need to match the vector element type). @@ -1185,7 +1167,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, if (!N) { N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); } SDValue Result(N, 0); @@ -1198,7 +1180,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, } SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget); + return getConstant(Val, TLI->getPointerTy(), isTarget); } @@ -1227,7 +1209,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ if (!N) { N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); } SDValue Result(N, 0); @@ -1263,7 +1245,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); - const TargetLowering *TLI = TM.getTargetLowering(); // Truncate (with sign-extension) the offset value to the pointer size. unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); @@ -1290,7 +1271,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, DL.getDebugLoc(), GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1305,7 +1286,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1325,7 +1306,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1336,8 +1317,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = - TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1352,7 +1332,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1364,8 +1344,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = - TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1380,7 +1359,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1398,7 +1377,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1412,7 +1391,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1426,7 +1405,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { if (N) return SDValue(N, 0); N = new (NodeAllocator) VTSDNode(VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1434,7 +1413,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1445,7 +1424,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, TargetFlags)]; if (N) return SDValue(N, 0); N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1456,7 +1435,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { if (!CondCodeNodes[Cond]) { CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; - AllNodes.push_back(N); + InsertNode(N); } return SDValue(CondCodeNodes[Cond], 0); @@ -1594,10 +1573,31 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, dl.getDebugLoc(), N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } +SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { + MVT VT = SV.getSimpleValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + SmallVector MaskVec; + + for (unsigned i = 0; i != NumElems; ++i) { + int Idx = SV.getMaskElt(i); + if (Idx >= 0) { + if (Idx < (int)NumElems) + Idx += NumElems; + else + Idx -= NumElems; + } + MaskVec.push_back(Idx); + } + + SDValue Op0 = SV.getOperand(0); + SDValue Op1 = SV.getOperand(1); + return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]); +} + SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, @@ -1619,7 +1619,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, dl.getDebugLoc(), Ops, Code); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1633,7 +1633,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1647,7 +1647,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1663,7 +1663,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), dl.getDebugLoc(), Root, Label); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1686,7 +1686,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1704,7 +1704,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { SDNode *N = new (NodeAllocator) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1720,7 +1720,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1741,7 +1741,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, dl.getDebugLoc(), VT, Ptr, SrcAS, DestAS); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1749,7 +1749,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy); + EVT ShTy = TLI->getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -1762,7 +1762,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); - const TargetLowering *TLI = TM.getTargetLowering(); unsigned StackAlign = std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign); @@ -1777,7 +1776,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const TargetLowering *TLI = TM.getTargetLowering(); const DataLayout *TD = TLI->getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1796,7 +1794,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETFALSE2: return getConstant(0, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { - const TargetLowering *TLI = TM.getTargetLowering(); TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(N1->getValueType(0)); return getConstant( @@ -1885,7 +1882,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, // Ensure that the constant occurs on the RHS. ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); MVT CompVT = N1.getValueType().getSimpleVT(); - if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT)) + if (!TLI->isCondCodeLegal(SwappedCond, CompVT)) return SDValue(); return getSetCC(dl, VT, N2, N1, SwappedCond); @@ -1921,7 +1918,6 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// them in the KnownZero/KnownOne bitsets. void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { - const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -2357,7 +2353,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ - const TargetLowering *TLI = TM.getTargetLowering(); EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2655,10 +2650,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { DL.getDebugLoc(), getVTList(VT)); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -2753,7 +2745,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::FP_TO_UINT: { integerPart x[2]; bool ignored; - assert(integerPartWidth >= 64); + static_assert(integerPartWidth >= 64, "APFloat parts too small!"); // FIXME need to be more flexible about rounding mode. APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), Opcode==ISD::FP_TO_SINT, @@ -2772,6 +2764,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, } } + // Constant fold unary operations with a vector integer operand. + if (BuildVectorSDNode *BV = dyn_cast(Operand.getNode())) { + if (BV->isConstant()) { + switch (Opcode) { + default: + // FIXME: Entirely reasonable to perform folding of other unary + // operations here as the need arises. + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + SmallVector Ops; + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + SDValue OpN = BV->getOperand(i); + // Let the above scalar folding handle the conversion of each + // element. + OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(), + OpN); + Ops.push_back(OpN); + } + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + } + } + } + unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { case ISD::TokenFactor: @@ -2931,10 +2948,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, DL.getDebugLoc(), VTs, Operand); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3375,6 +3389,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Constant fold FP operations. + bool HasFPExceptions = TLI->hasFloatingPointExceptions(); ConstantFPSDNode *N1CFP = dyn_cast(N1.getNode()); ConstantFPSDNode *N2CFP = dyn_cast(N2.getNode()); if (N1CFP) { @@ -3388,28 +3403,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, switch (Opcode) { case ISD::FADD: s = V1.add(V2, APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) + if (!HasFPExceptions || s != APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FSUB: s = V1.subtract(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp) + if (!HasFPExceptions || s!=APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FMUL: s = V1.multiply(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp) + if (!HasFPExceptions || s!=APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FDIV: s = V1.divide(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + if (!HasFPExceptions || (s!=APFloat::opInvalidOp && + s!=APFloat::opDivByZero)) { return getConstantFP(V1, VT); + } break; case ISD::FREM : s = V1.mod(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + if (!HasFPExceptions || (s!=APFloat::opInvalidOp && + s!=APFloat::opDivByZero)) { return getConstantFP(V1, VT); + } break; case ISD::FCOPYSIGN: V1.copySign(V2); @@ -3526,10 +3545,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3633,10 +3649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, DL.getDebugLoc(), VTs, N1, N2, N3); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3802,7 +3815,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, if (VT == MVT::Other) { unsigned AS = 0; if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || - TLI.allowsUnalignedMemoryAccesses(VT, AS)) { + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) { VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { @@ -3862,7 +3875,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast) + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -3926,7 +3939,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, // Don't promote to an alignment that would require dynamic stack // realignment. - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) @@ -3982,7 +3995,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, - MinAlign(SrcAlign, SrcOff)); + false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), VT, isVol, @@ -4202,9 +4215,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, - isVol, AlwaysInline, - DstPtrInfo, SrcPtrInfo); + TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -4223,8 +4235,6 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // beyond the given memory regions. But fixing this isn't easy, and most // people don't care. - const TargetLowering *TLI = TM.getTargetLowering(); - // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -4270,17 +4280,14 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstPtrInfo, SrcPtrInfo); + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. - const TargetLowering *TLI = TM.getTargetLowering(); - // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -4325,31 +4332,22 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstPtrInfo); + SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, + Size, Align, isVol, DstPtrInfo); if (Result.getNode()) return Result; // Emit a library call. - const TargetLowering *TLI = TM.getTargetLowering(); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); - // Extend or truncate the argument to be an i32 value for the call. - if (Src.getValueType().bitsGT(MVT::i32)) - Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); - else - Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); Entry.Node = Src; - Entry.Ty = Type::getInt32Ty(*getContext()); - Entry.isSExt = true; + Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); Args.push_back(Entry); Entry.Node = Size; Entry.Ty = IntPtrTy; - Entry.isSExt = false; Args.push_back(Entry); // FIXME: pass in SDLoc @@ -4396,7 +4394,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SuccessOrdering, FailureOrdering, SynchScope); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4541,7 +4539,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, - bool ReadMem, bool WriteMem) { + bool ReadMem, bool WriteMem, unsigned Size) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -4553,8 +4551,10 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, Flags |= MachineMemOperand::MOLoad; if (Vol) Flags |= MachineMemOperand::MOVolatile; + if (!Size) + Size = MemVT.getStoreSize(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); + MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -4593,7 +4593,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, dl.getDebugLoc(), VTList, Ops, MemVT, MMO); } - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4637,7 +4637,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, - unsigned Alignment, const MDNode *TBAAInfo, + unsigned Alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -4660,7 +4660,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - TBAAInfo, Ranges); + AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -4709,7 +4709,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, dl.getDebugLoc(), VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4718,12 +4718,12 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo, Ranges); + AAInfo, Ranges); } SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, @@ -4738,11 +4738,12 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isInvariant, unsigned Alignment, + const AAMDNodes &AAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment, - TBAAInfo); + PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant, + Alignment, AAInfo); } @@ -4769,7 +4770,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + unsigned Alignment, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4788,7 +4789,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, Val.getValueType().getStoreSize(), Alignment, - TBAAInfo); + AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } @@ -4816,7 +4817,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, dl.getDebugLoc(), VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4824,7 +4825,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, - const MDNode *TBAAInfo) { + const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4842,7 +4843,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, - TBAAInfo); + AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -4885,7 +4886,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, dl.getDebugLoc(), VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4912,7 +4913,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, ST->getMemoryVT(), ST->getMemOperand()); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4991,10 +4992,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, VTs, Ops); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -5074,15 +5072,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, VTList, Ops); } } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, ArrayRef()); + return getNode(Opcode, DL, VTList, None); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, @@ -5464,6 +5459,10 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { /// node, and because it doesn't require CSE recalculation for any of /// the node's users. /// +/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. +/// As a consequence it isn't appropriate to use from within the DAG combiner or +/// the legalizer which maintain worklists that would need to be updated when +/// deleting things. SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef Ops) { unsigned NumOps = Ops.size(); @@ -5530,10 +5529,9 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // new operands. if (!DeadNodeSet.empty()) { SmallVector DeadNodes; - for (SmallPtrSet::iterator I = DeadNodeSet.begin(), - E = DeadNodeSet.end(); I != E; ++I) - if ((*I)->use_empty()) - DeadNodes.push_back(*I); + for (SDNode *N : DeadNodeSet) + if (N->use_empty()) + DeadNodes.push_back(N); RemoveDeadNodes(DeadNodes); } @@ -5702,10 +5700,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, if (DoCSE) CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); -#ifndef NDEBUG - VerifyMachineNode(N); -#endif + InsertNode(N); return N; } @@ -5751,26 +5746,24 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, /// getDbgValue - Creates a SDDbgValue node. /// /// SDNode -SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, - bool IsIndirect, uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O); +SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, + unsigned R, bool IsIndirect, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); } /// Constant -SDDbgValue * -SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C, - uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); +SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, + const Value *C, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O); } /// FrameIndex -SDDbgValue * -SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); +SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, + unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O); } namespace { @@ -6159,9 +6152,11 @@ unsigned SelectionDAG::AssignTopologicalOrder() { /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { - DbgInfo->add(DB, SD, isParameter); - if (SD) + if (SD) { + assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); SD->setHasDebugValue(true); + } + DbgInfo->add(DB, SD, isParameter); } /// TransferDbgValues - Transfer SDDbgValues. @@ -6176,10 +6171,10 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { I != E; ++I) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { - SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), - Dbg->isIndirect(), - Dbg->getOffset(), Dbg->getDebugLoc(), - Dbg->getOrder()); + SDDbgValue *Clone = + getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode, + To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(), + Dbg->getDebugLoc(), Dbg->getOrder()); ClonedDVs.push_back(Clone); } } @@ -6217,7 +6212,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(isNonTemporal() == MMO->isNonTemporal() && "Non-temporal encoding error!"); - assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); + // We check here that the size of the memory operand fits within the size of + // the MMO. This is because the MMO might indicate only a possible address + // range instead of specifying the affected memory addresses precisely. + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); } MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, @@ -6227,7 +6225,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); - assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); } /// Profile - Gather unique data for the node. @@ -6371,7 +6369,7 @@ bool SDNode::hasPredecessor(const SDNode *N) const { bool SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSet &Visited, + SmallPtrSetImpl &Visited, SmallVectorImpl &Worklist) const { if (Visited.empty()) { Worklist.push_back(this); @@ -6387,7 +6385,7 @@ SDNode::hasPredecessorHelper(const SDNode *N, const SDNode *M = Worklist.pop_back_val(); for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { SDNode *Op = M->getOperand(i).getNode(); - if (Visited.insert(Op)) + if (Visited.insert(Op).second) Worklist.push_back(Op); if (Op == N) return true; @@ -6427,7 +6425,6 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. - const TargetLowering *TLI = TM.getTargetLowering(); EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, @@ -6507,7 +6504,6 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; int64_t Offset2 = 0; - const TargetLowering *TLI = TM.getTargetLowering(); bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) @@ -6522,7 +6518,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV; int64_t GVOffset = 0; - const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); @@ -6749,8 +6744,8 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, - SmallPtrSet &Visited, - SmallPtrSet &Checked, + SmallPtrSetImpl &Visited, + SmallPtrSetImpl &Checked, const llvm::SelectionDAG *DAG) { // If this node has already been checked, don't check it again. if (Checked.count(N)) @@ -6758,7 +6753,7 @@ static void checkForCyclesHelper(const SDNode *N, // If a node has already been visited on this depth-first walk, reject it as // a cycle. - if (!Visited.insert(N)) { + if (!Visited.insert(N).second) { errs() << "Detected cycle in SelectionDAG\n"; dbgs() << "Offending node:\n"; N->dumprFull(DAG); dbgs() << "\n"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 28d8e98..8f582f1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -58,6 +58,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -645,8 +646,10 @@ namespace { /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, const Value *V) const; + void + getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, + SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -761,9 +764,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V) const { + SDValue &Chain, SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ISD::NodeType ExtendKind = PreferredExtendType; // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); @@ -772,8 +776,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; - ISD::NodeType ExtendKind = - TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; + + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) + ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT, V, ExtendKind); @@ -860,7 +865,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getTarget().getDataLayout(); + DL = DAG.getSubtarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -988,15 +993,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); MDNode *Variable = DI->getVariable(); + MDNode *Expr = DI->getExpression(); uint64_t Offset = DI->getOffset(); // A dbg.value for an alloca is always indirect. bool IsIndirect = isa(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) { - SDV = DAG.getDbgValue(Variable, Val.getNode(), - Val.getResNo(), IsIndirect, - Offset, dl, DbgSDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, + Val)) { + SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), + IsIndirect, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1018,8 +1024,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { DenseMap::iterator It = FuncInfo.ValueMap.find(V); if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), - InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, + V->getType()); SDValue Chain = DAG.getEntryNode(); N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, N); @@ -1050,10 +1056,10 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const Constant *C = dyn_cast(V)) { - EVT VT = TLI->getValueType(V->getType(), true); + EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) return DAG.getConstant(*CI, VT); @@ -1063,7 +1069,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, TLI->getPointerTy(AS)); + return DAG.getConstant(0, TLI.getPointerTy(AS)); } if (const ConstantFP *CFP = dyn_cast(C)) @@ -1117,7 +1123,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector ValueVTs; - ComputeValueVTs(*TLI, C->getType(), ValueVTs); + ComputeValueVTs(TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1149,7 +1155,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); - EVT EltVT = TLI->getValueType(VecTy->getElementType()); + EVT EltVT = TLI.getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1169,13 +1175,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1184,7 +1190,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getControlRoot(); SmallVector Outs; SmallVector OutVals; @@ -1197,7 +1203,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector PtrValueVTs; - ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1205,7 +1211,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); @@ -1224,7 +1230,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector ValueVTs; - ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -1242,10 +1248,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); + VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); - unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); - MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); + MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), @@ -1275,9 +1281,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); - Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg, - Outs, OutVals, getCurSDLoc(), - DAG); + Chain = DAG.getTargetLoweringInfo().LowerReturn( + Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1601,10 +1606,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (!TM.getTargetLowering()->isJumpExpensive() && - BOp->hasOneUse() && - (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && + BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), getEdgeWeight(BrMBB, Succ1MBB)); @@ -1724,7 +1728,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = TM.getTargetLowering()->getPointerTy(); + EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -1752,10 +1756,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - const TargetLowering *TLI = TM.getTargetLowering(); - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1763,12 +1767,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. - SDValue CMP = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, - DAG.getConstant(JTH.Last - JTH.First,VT), - ISD::SETUGT); + SDValue CMP = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1799,8 +1801,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB) { // First create the loads to the guard/stack slot for the comparison. - const TargetLowering *TLI = TM.getTargetLowering(); - EVT PtrTy = TLI->getPointerTy(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(); MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI->getStackProtectorIndex(); @@ -1810,10 +1812,22 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); unsigned Align = - TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); - SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), - GuardPtr, MachinePointerInfo(IRGuard, 0), - true, false, false, Align); + TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + + SDValue Guard; + + // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the + // guard value from the virtual register holding the value. Otherwise, emit a + // volatile load to retrieve the stack guard value. + unsigned GuardReg = SPD.getGuardReg(); + + if (GuardReg && TLI.useLoadStackGuardNode()) + Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, + PtrTy); + else + Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), StackSlotPtr, @@ -1824,11 +1838,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, EVT VT = Guard.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); - SDValue Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(0, VT), - ISD::SETNE); + SDValue Cmp = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. @@ -1853,10 +1866,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { - const TargetLowering *TLI = TM.getTargetLowering(); - SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, - MVT::isVoid, nullptr, 0, false, - getCurSDLoc(), false, false).second; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = + TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, + nullptr, 0, false, getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1871,16 +1884,15 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, DAG.getConstant(B.First, VT)); // Check range - const TargetLowering *TLI = TM.getTargetLowering(); - SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue RangeCmp = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), Sub.getValueType()), - Sub, DAG.getConstant(B.Range, VT), - ISD::SETUGT); + Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; - if (!TLI->isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) @@ -1892,7 +1904,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI->getPointerTy(); + VT = TLI.getPointerTy(); Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); } @@ -1936,22 +1948,18 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, Reg, VT); SDValue Cmp; unsigned PopCount = CountPopulation_64(B.Mask); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), VT), - ISD::SETEQ); + Cmp = DAG.getSetCC( + getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - ShiftOp, - DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), - ISD::SETNE); + Cmp = DAG.getSetCC( + getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, @@ -1961,9 +1969,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), VT, SwitchVal, DAG.getConstant(B.Mask, VT)); Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - AndOp, DAG.getConstant(0, VT), - ISD::SETNE); + TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, + DAG.getConstant(0, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -2001,8 +2008,17 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { if (isa(Callee)) visitInlineAsm(&I); else if (Fn && Fn->isIntrinsic()) { - assert(Fn->getIntrinsicID() == Intrinsic::donothing); - // Ignore invokes to @llvm.donothing: jump directly to the next BB. + switch (Fn->getIntrinsicID()) { + default: + llvm_unreachable("Cannot invoke this intrinsic"); + case Intrinsic::donothing: + // Ignore invokes to @llvm.donothing: jump directly to the next BB. + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + visitPatchpoint(&I, LandingPad); + break; + } } else LowerCallTo(&I, getValue(Callee), false, LandingPad); @@ -2034,26 +2050,26 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getExceptionPointerRegister() == 0 && - TLI->getExceptionSelectorRegister() == 0) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.getExceptionPointerRegister() == 0 && + TLI.getExceptionSelectorRegister() == 0) return; SmallVector ValueVTs; - ComputeValueVTs(*TLI, LP.getType(), ValueVTs); + ComputeValueVTs(TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), - getCurSDLoc(), ValueVTs[0]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + getCurSDLoc(), ValueVTs[0]); Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), - getCurSDLoc(), ValueVTs[1]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), + getCurSDLoc(), ValueVTs[1]); // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), @@ -2218,9 +2234,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.supportJumpTables() && - (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } static APInt ComputeRange(const APInt &First, const APInt &Last) { @@ -2245,8 +2260,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2327,7 +2342,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } // Create a jump table index for this jump table. - unsigned JTEncoding = TLI->getJumpTableEncoding(); + unsigned JTEncoding = TLI.getJumpTableEncoding(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) ->createJumpTableIndex(DestBBs); @@ -2347,7 +2362,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock* Default, MachineBasicBlock* SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. @@ -2413,8 +2427,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, RSize -= J->size(); } - const TargetLowering *TLI = TM.getTargetLowering(); - if (areJTsAllowed(*TLI)) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (areJTsAllowed(TLI)) { // If our case is dense we *really* should handle it earlier! assert((FMetric > 0) && "Should handle dense range earlier!"); } else { @@ -2484,8 +2498,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, const Value* SV, MachineBasicBlock* Default, MachineBasicBlock* SwitchBB) { - const TargetLowering *TLI = TM.getTargetLowering(); - EVT PTy = TLI->getPointerTy(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PTy = TLI.getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; @@ -2496,7 +2510,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI->isOperationLegal(ISD::SHL, PTy)) + if (!TLI.isOperationLegal(ISD::SHL, PTy)) return false; size_t numCmps = 0; @@ -2601,21 +2615,19 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, BitTestBlock BTB(lowBound, cmpRange, SV, -1U, MVT::Other, (CR.CaseBB == SwitchBB), - CR.CaseBB, Default, BTC); + CR.CaseBB, Default, std::move(BTC)); if (CR.CaseBB == SwitchBB) visitBitTestHeader(BTB, SwitchBB); - BitTestCases.push_back(BTB); + BitTestCases.push_back(std::move(BTB)); return true; } /// Clusterify - Transform simple list of Cases into list of CaseRange's -size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { - size_t numCmps = 0; - +void SelectionDAGBuilder::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); @@ -2653,13 +2665,15 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, } } - for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { - if (I->Low != I->High) - // A range counts double, since it requires two compares. - ++numCmps; - } + DEBUG({ + size_t numCmps = 0; + for (auto &I : Cases) + // A range counts double, since it requires two compares. + numCmps += I.Low != I.High ? 2 : 1; - return numCmps; + dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'; + }); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, @@ -2701,10 +2715,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // representing each one, and sort the vector so that we can efficiently // create a binary search tree from them. CaseVector Cases; - size_t numCmps = Clusterify(Cases, SI); - DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << '\n'); - (void)numCmps; + Clusterify(Cases, SI); // Get the Value to be switched on and default basic blocks, which will be // inserted into CaseBlock records, representing basic blocks in the binary @@ -2738,7 +2749,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Emit binary tree. We need to pick a pivot, and push left and right ranges // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); + handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB); } } @@ -2749,7 +2760,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { SmallSet Done; for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { BasicBlock *BB = I.getSuccessor(i); - bool Inserted = Done.insert(BB); + bool Inserted = Done.insert(BB).second; if (!Inserted) continue; @@ -2806,7 +2817,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = + DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -2861,8 +2873,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { if (isa(&I) && cast(&I)->isExact() && !isa(Op1) && isa(Op2) && !cast(Op2)->isNullValue()) - setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2, - getCurSDLoc(), DAG)); + setValue(&I, DAG.getTargetLoweringInfo() + .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG)); else setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); @@ -2878,7 +2890,7 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } @@ -2893,13 +2905,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { ISD::CondCode Condition = getFCmpCondCode(predicate); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector ValueVTs; - ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs); + ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2926,7 +2938,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } @@ -2934,7 +2946,7 @@ void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } @@ -2942,52 +2954,51 @@ void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT DestVT = TLI->getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), - DestVT, N, - DAG.getTargetConstant(0, TLI->getPointerTy()))); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, + DAG.getTargetConstant(0, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } @@ -2995,7 +3006,7 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } @@ -3003,13 +3014,13 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. @@ -3031,7 +3042,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); SDValue N = getValue(SV); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); @@ -3049,8 +3060,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy()); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), - TM.getTargetLowering()->getValueType(I.getType()), - InVec, InVal, InIdx)); + TLI.getValueType(I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { @@ -3059,8 +3069,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy()); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - TM.getTargetLowering()->getValueType(I.getType()), - InVec, InIdx)); + TLI.getValueType(I.getType()), InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -3082,8 +3091,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -3202,9 +3211,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else - Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, - Src, DAG.getConstant(StartIdx[Input], - TLI->getVectorIdxTy())); + Src = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, + DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); } // Calculate new mask. @@ -3230,7 +3239,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = TLI->getVectorIdxTy(); + EVT IdxVT = TLI.getVectorIdxTy(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3262,16 +3271,22 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector AggValueVTs; - ComputeValueVTs(*TLI, AggTy, AggValueVTs); + ComputeValueVTs(TLI, AggTy, AggValueVTs); SmallVector ValValueVTs; - ComputeValueVTs(*TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); SmallVector Values(NumAggValues); + // Ignore an insertvalue that produces an empty object + if (!NumAggValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + SDValue Agg = getValue(Op0); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. @@ -3302,9 +3317,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValValueVTs; - ComputeValueVTs(*TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -3353,13 +3368,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = cast(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const ConstantInt *CI = dyn_cast(Idx)) { if (CI->isZero()) continue; uint64_t Offs = DL->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI->getPointerTy(AS); + EVT PTy = TLI.getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, @@ -3373,8 +3388,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), - DL->getTypeAllocSize(Ty)); + APInt ElementSize = + APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3411,15 +3426,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - const TargetLowering *TLI = TM.getTargetLowering(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), - I.getAlignment()); + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI->getPointerTy(); + EVT IntPtr = TLI.getPointerTy(); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); @@ -3430,7 +3445,8 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; @@ -3464,15 +3480,18 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; - bool isInvariant = I.getMetadata("invariant.load") != nullptr; + bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; unsigned Alignment = I.getAlignment(); - const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets); + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3483,7 +3502,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( - AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { + AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3492,9 +3511,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } - const TargetLowering *TLI = TM.getTargetLowering(); if (isVolatile) - Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); SmallVector Values(NumValues); SmallVector Chains(std::min(unsigned(MaxParallelChains), @@ -3520,7 +3538,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, isInvariant, Alignment, TBAAInfo, + isNonTemporal, isInvariant, Alignment, AAInfo, Ranges); Values[i] = L; @@ -3549,7 +3567,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets); + ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(), + ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3565,9 +3584,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; + bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); - const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -3583,7 +3604,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue St = DAG.getStore(Root, getCurSDLoc(), SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); Chains[ChainI] = St; } @@ -3592,30 +3613,6 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } -static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, - SynchronizationScope Scope, - bool Before, SDLoc dl, - SelectionDAG &DAG, - const TargetLowering &TLI) { - // Fence, if necessary - if (Before) { - if (Order == AcquireRelease || Order == SequentiallyConsistent) - Order = Release; - else if (Order == Acquire || Order == Monotonic || Order == Unordered) - return Chain; - } else { - if (Order == AcquireRelease) - Order = Acquire; - else if (Order == Release || Order == Monotonic || Order == Unordered) - return Chain; - } - SDValue Ops[3]; - Ops[0] = Chain; - Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); - Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); - return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); -} - void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -3624,27 +3621,16 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, - DAG, *TLI); - MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); SDValue L = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, - TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); SDValue OutChain = L.getValue(2); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3671,38 +3657,28 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, *TLI); - SDValue L = DAG.getAtomic(NT, dl, getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), - I.getPointerOperand(), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + I.getPointerOperand(), + /* Alignment=*/ 0, Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDLoc dl = getCurSDLoc(); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3713,8 +3689,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3728,19 +3704,14 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT)); - InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); + InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), MMO, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3753,28 +3724,19 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getValueOperand()->getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, *TLI); - SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, InChain, getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); - - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); + Order, Scope); DAG.setRoot(OutChain); } @@ -3799,13 +3761,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; - const TargetLowering *TLI = TM.getTargetLowering(); - bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3814,7 +3776,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector ValueVTs; - ComputeValueVTs(*TLI, I.getType(), ValueVTs); + ComputeValueVTs(TLI, I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3829,7 +3791,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, - Info.readMem, Info.writeMem); + Info.readMem, Info.writeMem, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -3848,7 +3810,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast(I.getType())) { - EVT VT = TLI->getValueType(PTy); + EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } @@ -4555,16 +4517,17 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. -bool -SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, bool IsIndirect, - const SDValue &N) { +bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, + MDNode *Variable, + MDNode *Expr, int64_t Offset, + bool IsIndirect, + const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); - const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Ignore inlined function arguments here. DIVariable DV(Variable); @@ -4610,14 +4573,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, return false; if (Op->isReg()) - FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), - IsIndirect, - Op->getReg(), Offset, Variable)); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, Op->getReg(), Offset, Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op).addImm(Offset).addMetadata(Variable)); + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(Offset) + .addMetadata(Variable) + .addMetadata(Expr)); return true; } @@ -4635,7 +4600,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -4649,17 +4614,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return nullptr; case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue RegName = DAG.getMDNode(cast(Reg)); - EVT VT = TM.getTargetLowering()->getValueType(I.getType()); + EVT VT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); return nullptr; } @@ -4673,9 +4638,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::setjmp: - return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4736,6 +4701,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); MDNode *Variable = DI.getVariable(); + MDNode *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); DIVariable DIVar(Variable); assert((!DIVar || DIVar.isVariable()) && @@ -4771,16 +4737,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FINode = dyn_cast(N.getNode()); if (FINode) // Byval parameter. We have a frame index at this point. - SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(), - 0, dl, SDNodeOrder); + SDV = DAG.getFrameIndexDbgValue( + Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, 0, false, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); return nullptr; } } else if (AI) - SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. @@ -4793,7 +4759,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) { + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, + N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. if (const AllocaInst *AI = dyn_cast(Address)) { @@ -4801,7 +4768,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getFrameIndexDbgValue(Variable, SI->second, + SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, 0, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; @@ -4822,6 +4789,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; MDNode *Variable = DI.getVariable(); + MDNode *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4829,7 +4797,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDDbgValue *SDV; if (isa(V) || isa(V) || isa(V)) { - SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder); + SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, + SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); } else { // Do not use getValue() in here; we don't want to generate code at @@ -4841,10 +4810,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (N.getNode()) { // A dbg.value for an alloca is always indirect. bool IsIndirect = isa(V) || Offset != 0; - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) { - SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), IsIndirect, - Offset, dl, SDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, + IsIndirect, N)) { + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), + IsIndirect, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4878,7 +4847,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. - GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); + GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); @@ -4899,15 +4868,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI->getPointerTy()); + TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, sdl, CfaArg.getValueType(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, CfaArg.getValueType()), CfaArg); - SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, - TLI->getPointerTy(), - DAG.getConstant(0, TLI->getPointerTy())); + SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), + DAG.getConstant(0, TLI.getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -4997,7 +4965,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), @@ -5009,14 +4977,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI->getValueType(I.getType()); - EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast(I.getArgOperand(2))->getZExtValue() & 1) * ElVT.getVectorNumElements(); - Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, TLI->getVectorIdxTy())); + Res = + DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), + DAG.getConstant(Idx, TLI.getVectorIdxTy())); setValue(&I, Res); return nullptr; } @@ -5024,12 +4992,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); uint64_t Idx = (cast(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), - DAG.getConstant(Idx, TLI->getVectorIdxTy())); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); setValue(&I, Res); return nullptr; } @@ -5055,7 +5023,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), @@ -5071,23 +5039,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), DAG)); return nullptr; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, *TLI)); + getValue(I.getArgOperand(1)), DAG, TLI)); return nullptr; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -5119,6 +5087,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; } + case Intrinsic::minnum: + setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::maxnum: + setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -5133,9 +5113,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)))); return nullptr; case Intrinsic::fmuladd: { - EVT VT = TLI->getValueType(I.getType()); + EVT VT = TLI.getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI->isFMAFasterThanFMulAndFAdd(VT)) { + TLI.isFMAFasterThanFMulAndFAdd(VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -5155,12 +5135,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::convert_to_fp16: - setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl, - MVT::i16, getValue(I.getArgOperand(0)))); + setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, + DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, + getValue(I.getArgOperand(0)), + DAG.getTargetConstant(0, MVT::i32)))); return nullptr; case Intrinsic::convert_from_fp16: - setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl, - MVT::f32, getValue(I.getArgOperand(0)))); + setValue(&I, + DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()), + DAG.getNode(ISD::BITCAST, sdl, MVT::f16, + getValue(I.getArgOperand(0))))); return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); @@ -5205,7 +5189,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op); + DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; @@ -5219,9 +5203,44 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI->getPointerTy(); + EVT PtrTy = TLI.getPointerTy(); + SDValue Src, Chain = getRoot(); + const Value *Ptr = cast(I.getArgOperand(0))->getPointerOperand(); + const GlobalVariable *GV = dyn_cast(Ptr); + + // See if Ptr is a bitcast. If it is, look through it and see if we can get + // global variable __stack_chk_guard. + if (!GV) + if (const Operator *BC = dyn_cast(Ptr)) + if (BC->getOpcode() == Instruction::BitCast) + GV = dyn_cast(BC->getOperand(0)); + + if (GV && TLI.useLoadStackGuardNode()) { + // Emit a LOAD_STACK_GUARD node. + MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, + sdl, PtrTy, Chain); + MachinePointerInfo MPInfo(GV); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); + unsigned Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant; + *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, + PtrTy.getSizeInBits() / 8, + DAG.getEVTAlignment(PtrTy)); + Node->setMemRefs(MemRefs, MemRefs + 1); + + // Copy the guard value to a virtual register so that it can be + // retrieved in the epilogue. + Src = SDValue(Node, 0); + const TargetRegisterClass *RC = + TLI.getRegClassFor(Src.getSimpleValueType()); + unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); + + SPDescriptor.setGuardReg(Reg); + Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); + } else { + Src = getValue(I.getArgOperand(0)); // The guard's value. + } - SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; @@ -5230,7 +5249,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(getRoot(), sdl, Src, FIN, + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(FI), true, false, 0); setValue(&I, Res); @@ -5259,8 +5278,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; + case Intrinsic::assume: case Intrinsic::var_annotation: - // Discard annotate attributes + // Discard annotate attributes and assumptions return nullptr; case Intrinsic::init_trampoline: { @@ -5281,7 +5301,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, - TLI->getPointerTy(), + TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; } @@ -5321,10 +5341,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl).setChain(getRoot()) .setCallee(CallingConv::C, I.getType(), - DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), std::move(Args), 0); - std::pair Result = TLI->LowerCallTo(CLI); + std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return nullptr; } @@ -5388,11 +5408,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!LifetimeObject) continue; - int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + // First check that the Alloca is static, otherwise it won't have a + // valid frame index. + auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); + if (SI == FuncInfo.StaticAllocaMap.end()) + return nullptr; + + int FI = SI->second; SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -5402,7 +5428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return nullptr; case Intrinsic::invariant_end: // Discard region information. @@ -5420,7 +5446,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::clear_cache: - return TLI->getClearCacheBuiltinName(); + return TLI.getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return nullptr; @@ -5430,42 +5456,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: { - visitPatchpoint(I); + visitPatchpoint(&I); return nullptr; } } } -void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, - bool isTailCall, - MachineBasicBlock *LandingPad) { - const TargetLowering *TLI = TM.getTargetLowering(); - PointerType *PT = cast(CS.getCalledValue()->getType()); - FunctionType *FTy = cast(PT->getElementType()); - Type *RetTy = FTy->getReturnType(); +std::pair +SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + MachineBasicBlock *LandingPad) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); MCSymbol *BeginLabel = nullptr; - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Args.reserve(CS.arg_size()); - - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { - const Value *V = *i; - - // Skip empty types - if (V->getType()->isEmptyTy()) - continue; - - SDValue ArgNode = getValue(V); - Entry.Node = ArgNode; Entry.Ty = V->getType(); - - // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin() + 1); - Args.push_back(Entry); - } - if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. @@ -5486,24 +5488,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // this call might not return. (void)getRoot(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); - } - // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, DAG)) - isTailCall = false; + CLI.setChain(getRoot()); + } - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + std::pair Result = TLI->LowerCallTo(CLI); - std::pair Result = TLI->LowerCallTo(CLI); - assert((isTailCall || Result.second.getNode()) && + assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); - if (Result.first.getNode()) - setValue(CS.getInstruction(), Result.first); if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted @@ -5526,6 +5521,50 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Inform MachineModuleInfo of range. MMI.addInvoke(LandingPad, BeginLabel, EndLabel); } + + return Result; +} + +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + MachineBasicBlock *LandingPad) { + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FTy = cast(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Args.push_back(Entry); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) + isTailCall = false; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setTailCall(isTailCall); + std::pair Result = lowerInvokable(CLI, LandingPad); + + if (Result.first.getNode()) + setValue(CS.getInstruction(), Result.first); } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the @@ -5591,7 +5630,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { - EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); if (IsSigned) Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); else @@ -5616,7 +5655,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast(Size); if (CSize && CSize->getZExtValue() == 0) { - EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); setValue(&I, DAG.getConstant(0, CallVT)); return true; } @@ -5673,15 +5712,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { unsigned DstAS = LHS->getType()->getPointerAddressSpace(); unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI->isTypeLegal(LoadVT) || - !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || - !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) + // TODO: Check alignment of src and dest ptrs. + if (!TLI.isTypeLegal(LoadVT) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } @@ -5859,6 +5899,26 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return true; } +/// visitBinaryFloatCall - If a call instruction is a binary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a binary floating-point call. + if (I.getNumArgOperands() != 2 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + I.getType() != I.getArgOperand(1)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp0 = getValue(I.getArgOperand(0)); + SDValue Tmp1 = getValue(I.getArgOperand(1)); + EVT VT = Tmp0.getValueType(); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + return true; +} + void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. if (isa(I.getCalledValue())) { @@ -5915,6 +5975,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FABS)) return; break; + case LibFunc::fmin: + case LibFunc::fminf: + case LibFunc::fminl: + if (visitBinaryFloatCall(I, ISD::FMINNUM)) + return; + break; + case LibFunc::fmax: + case LibFunc::fmaxf: + case LibFunc::fmaxl: + if (visitBinaryFloatCall(I, ISD::FMAXNUM)) + return; + break; case LibFunc::sin: case LibFunc::sinf: case LibFunc::sinl: @@ -6021,7 +6093,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol(RenameFn, - TM.getTargetLowering()->getPointerTy()); + DAG.getTargetLoweringInfo().getPointerTy()); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -6216,9 +6288,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI->ParseConstraints(CS); + TargetConstraints = TLI.ParseConstraints(CS); bool hasMemory = false; @@ -6243,10 +6315,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast(CS.getType())) { - OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI->getSimpleValueType(CS.getType()); + OpVT = TLI.getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -6267,8 +6339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). - getSimpleVT(); + OpVT = + OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); } OpInfo.ConstraintVT = OpVT; @@ -6279,7 +6351,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType - CType = TLI->getConstraintType(OpInfo.Codes[j]); + CType = TLI.getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -6311,10 +6383,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintVT != Input.ConstraintVT) { std::pair MatchRC = - TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); std::pair InputRC = - TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -6328,7 +6400,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) @@ -6356,16 +6428,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (isa(OpVal) || isa(OpVal) || isa(OpVal) || isa(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast(OpVal), - TLI->getPointerTy()); + TLI.getPointerTy()); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(SSFI), @@ -6383,7 +6455,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -6394,7 +6466,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6402,7 +6474,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI->getPointerTy())); + TLI.getPointerTy())); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6425,7 +6497,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, SDValue()); + TLI.ComputeConstraintToUse(OpInfo, SDValue()); // Ideally, we would only check against memory constraints. However, the // meaning of an other constraint can be target-specific and we can't easily @@ -6443,7 +6515,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, - TLI->getPointerTy())); + TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6465,7 +6537,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6545,7 +6617,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) { - if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { LLVMContext &Ctx = *DAG.getContext(); @@ -6572,7 +6644,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6584,7 +6656,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; - TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { LLVMContext &Ctx = *DAG.getContext(); @@ -6598,20 +6670,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI->getPointerTy() && + assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6674,7 +6746,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI->getValueType(CS.getType()); + EVT ResultType = TLI.getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6739,9 +6811,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const TargetLowering *TLI = TM.getTargetLowering(); - const DataLayout &DL = *TLI->getDataLayout(); - SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = *TLI.getDataLayout(); + SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), DL.getABITypeAlignment(I.getType())); @@ -6773,18 +6845,18 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. std::pair -SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, +SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool useVoidTy) { + bool UseVoidTy, + MachineBasicBlock *LandingPad) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. - ImmutableCallSite CS(&CI); for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; ArgI != ArgE; ++ArgI) { - const Value *V = CI.getOperand(ArgI); + const Value *V = CS->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); @@ -6795,14 +6867,13 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, Args.push_back(Entry); } - Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) - .setDiscardResult(!CI.use_empty()); + .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setDiscardResult(CS->use_empty()); - const TargetLowering *TLI = TM.getTargetLowering(); - return TLI->LowerCallTo(CLI); + return lowerInvokable(CLI, LandingPad); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6822,11 +6893,11 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. -static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, +static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, SmallVectorImpl &Ops, SelectionDAGBuilder &Builder) { - for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast(OpVal)) { Ops.push_back( Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); @@ -6877,7 +6948,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { cast(NBytesVal)->getZExtValue(), MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(CI, 2, Ops, *this); + addStackMapLiveVars(&CI, 2, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -6904,7 +6975,8 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. -void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { +void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, + MachineBasicBlock *LandingPad) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 , // i32 , // i8* , @@ -6912,32 +6984,29 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // [Args...], // [live variables...]) - CallingConv::ID CC = CI.getCallingConv(); - bool isAnyRegCC = CC == CallingConv::AnyReg; - bool hasDef = !CI.getType()->isVoidTy(); - SDValue Callee = getValue(CI.getOperand(2)); // + CallingConv::ID CC = CS.getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !CS->getType()->isVoidTy(); + SDValue Callee = getValue(CS->getOperand(2)); // // Get the real number of arguments participating in the call - SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); unsigned NumArgs = cast(NArgVal)->getZExtValue(); // Skip the four meta args: , , , // Intrinsics include all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; - assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && + assert(CS.arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. - unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; std::pair Result = - LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); - - // Set the root to the target-lowered call chain. - SDValue Chain = Result.second; - DAG.setRoot(Chain); + lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, + LandingPad); - SDNode *CallEnd = Chain.getNode(); - if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + SDNode *CallEnd = Result.second.getNode(); + if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) CallEnd = CallEnd->getOperand(0).getNode(); /// Get a call instruction from the call sequence chain. @@ -6945,16 +7014,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "Expected a callseq node."); SDNode *Call = CallEnd->getOperand(0).getNode(); - bool hasGlue = Call->getGluedNode(); + bool HasGlue = Call->getGluedNode(); // Replace the target specific call node with the patchable intrinsic. SmallVector Ops; // Add the and constants. - SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast(IDVal)->getZExtValue(), MVT::i64)); - SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast(NBytesVal)->getZExtValue(), MVT::i32)); @@ -6967,8 +7036,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Adjust to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] - unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); - NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); + NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); // Add the calling convention @@ -6976,20 +7045,20 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. - if (isAnyRegCC) + if (IsAnyRegCC) for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + Ops.push_back(getValue(CS.getArgument(i))); // Push the arguments from the call instruction up to the register mask. - SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; + SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. - if (hasGlue) + if (HasGlue) Ops.push_back(*(Call->op_end()-2)); else Ops.push_back(*(Call->op_end()-1)); @@ -6999,15 +7068,15 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { Ops.push_back(*(Call->op_begin())); // Push the glue flag (last operand). - if (hasGlue) + if (HasGlue) Ops.push_back(*(Call->op_end()-1)); SDVTList NodeTys; - if (isAnyRegCC && hasDef) { + if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValueVTs; - ComputeValueVTs(TLI, CI.getType(), ValueVTs); + ComputeValueVTs(TLI, CS->getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end @@ -7022,18 +7091,18 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { getCurSDLoc(), NodeTys, Ops); // Update the NodeMap. - if (hasDef) { - if (isAnyRegCC) - setValue(&CI, SDValue(MN, 0)); + if (HasDef) { + if (IsAnyRegCC) + setValue(CS.getInstruction(), SDValue(MN, 0)); else - setValue(&CI, Result.first); + setValue(CS.getInstruction(), Result.first); } // Fixup the consumers of the intrinsic. The chain and glue may be used in the // call sequence. Furthermore the location of the chain and glue can change // when the AnyReg calling convention is used and the intrinsic returns a // value. - if (isAnyRegCC && hasDef) { + if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); @@ -7182,8 +7251,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); - if (NeedsRegBlock) + if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7229,10 +7301,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (j != 0) MyFlags.Flags.setOrigAlign(1); - // Only mark the end at the last register of the last value. - if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) - MyFlags.Flags.setInConsecutiveRegsLast(); - CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7345,10 +7413,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - const TargetLowering *TLI = TM.getTargetLowering(); - RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V); + + ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == + FuncInfo.PreferredExtendType.end()) + ? ISD::ANY_EXTEND + : FuncInfo.PreferredExtendType[V]; + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } @@ -7374,15 +7447,13 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); - const TargetLowering *TLI = getTargetLowering(); const DataLayout *DL = TLI->getDataLayout(); SmallVector Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; - ComputeValueVTs(*getTargetLowering(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -7447,8 +7518,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); - if (NeedsRegBlock) + if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7461,11 +7535,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); - - // Only mark the end at the last register of the last value. - if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) - MyFlags.Flags.setInConsecutiveRegsLast(); - Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); @@ -7474,9 +7543,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Call the target to set up the argument values. SmallVector InVals; - SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), - F.isVarArg(), Ins, - dl, DAG, InVals); + SDValue NewRoot = TLI->LowerFormalArguments( + DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && @@ -7513,8 +7581,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; - NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), - SRetReg, ArgValue); + NewRoot = + SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. @@ -7629,7 +7697,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // If this terminator has multiple identical successors (common for // switches), only handle each succ once. - if (!SuccsHandled.insert(SuccMBB)) continue; + if (!SuccsHandled.insert(SuccMBB).second) + continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); @@ -7672,11 +7741,11 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector ValueVTs; - const TargetLowering *TLI = TM.getTargetLowering(); - ComputeValueVTs(*TLI, PN->getType(), ValueVTs); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 84679f9..f74e652 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SELECTIONDAGBUILDER_H -#define SELECTIONDAGBUILDER_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" @@ -21,6 +21,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLowering.h" #include namespace llvm { @@ -200,7 +201,7 @@ private: } }; - size_t Clusterify(CaseVector &Cases, const SwitchInst &SI); + void Clusterify(CaseVector &Cases, const SwitchInst &SI); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -276,9 +277,9 @@ private: BitTestBlock(APInt F, APInt R, const Value* SV, unsigned Rg, MVT RgVT, bool E, MachineBasicBlock* P, MachineBasicBlock* D, - const BitTestInfo& C): + BitTestInfo C): First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(C) { } + Parent(P), Default(D), Cases(std::move(C)) { } APInt First; APInt Range; const Value *SValue; @@ -397,7 +398,8 @@ private: class StackProtectorDescriptor { public: StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), - FailureMBB(nullptr), Guard(nullptr) { } + FailureMBB(nullptr), Guard(nullptr), + GuardReg(0) { } ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are @@ -455,6 +457,9 @@ private: MachineBasicBlock *getFailureMBB() { return FailureMBB; } const Value *getGuard() { return Guard; } + unsigned getGuardReg() const { return GuardReg; } + void setGuardReg(unsigned R) { GuardReg = R; } + private: /// The basic block for which we are generating the stack protector. /// @@ -477,6 +482,9 @@ private: /// stack protector stack slot. const Value *Guard; + /// The virtual register holding the stack guard value. + unsigned GuardReg; + /// Add a successor machine basic block to ParentMBB. If the successor mbb /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic /// block will be created. @@ -626,17 +634,23 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = nullptr); - std::pair LowerCallOperands(const CallInst &CI, - unsigned ArgIdx, - unsigned NumArgs, - SDValue Callee, - bool useVoidTy = false); + std::pair lowerCallOperands( + ImmutableCallSite CS, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + bool UseVoidTy = false, + MachineBasicBlock *LandingPad = nullptr); /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); private: + std::pair lowerInvokable( + TargetLowering::CallLoweringInfo &CLI, + MachineBasicBlock *LandingPad); + // Terminator instructions. void visitRet(const ReturnInst &I); void visitBr(const BranchInst &I); @@ -658,7 +672,6 @@ private: bool handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); bool handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, @@ -755,6 +768,7 @@ private: bool visitStrLenCall(const CallInst &I); bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); + bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); @@ -767,7 +781,8 @@ private: void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); - void visitPatchpoint(const CallInst &I); + void visitPatchpoint(ImmutableCallSite CS, + MachineBasicBlock *LandingPad = nullptr); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -784,7 +799,7 @@ private: /// EmitFuncArgumentDbgValue - If V is an function argument then create /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. - bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr, int64_t Offset, bool IsIndirect, const SDValue &N); }; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index b3a452f..c9f6cff 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; std::string SDNode::getOperationName(const SelectionDAG *G) const { @@ -36,7 +37,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "<>"; if (isMachineOpcode()) { if (G) - if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo()) if (getMachineOpcode() < TII->getNumOpcodes()) return TII->getName(getMachineOpcode()); return "<>"; @@ -140,6 +141,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Unary operators case ISD::FABS: return "fabs"; + case ISD::FMINNUM: return "fminnum"; + case ISD::FMAXNUM: return "fmaxnum"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; @@ -236,8 +239,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; - case ISD::FP16_TO_FP32: return "fp16_to_fp32"; - case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + case ISD::FP16_TO_FP: return "fp16_to_fp"; + case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::CONVERT_RNDSAT: { switch (cast(this)->getCvtCode()) { @@ -433,7 +436,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr); + OS << ' ' << PrintReg(R->getReg(), + G ? G->getSubtarget().getRegisterInfo() : nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast(this)) { OS << "'" << ES->getSymbol() << "'"; @@ -565,7 +569,7 @@ void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { typedef SmallPtrSet VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { - if (!once.insert(N)) // If we've been here before, return now. + if (!once.insert(N).second) // If we've been here before, return now. return; // Dump the current SDNode, but don't end the line yet. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 57e22e2..79109b7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -284,8 +284,8 @@ namespace llvm { /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetLowering *TLI = IS->getTargetLowering(); - const TargetSubtargetInfo &ST = IS->TM.getSubtarget(); + const TargetLowering *TLI = IS->TLI; + const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || TLI->getSchedulingPreference() == Sched::Source) @@ -336,7 +336,7 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), - FuncInfo(new FunctionLoweringInfo(TM)), + FuncInfo(new FunctionLoweringInfo()), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), @@ -411,32 +411,32 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { "-fast-isel-abort requires -fast-isel"); const Function &Fn = *mf.getFunction(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - const TargetLowering *TLI = TM.getTargetLowering(); - MF = &mf; - RegInfo = &MF->getRegInfo(); - AA = &getAnalysis(); - LibInfo = &getAnalysis(); - GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; - - TargetSubtargetInfo &ST = - const_cast(TM.getSubtarget()); - ST.resetSubtargetFeatures(MF); - TM.resetTargetOptions(MF); + // Reset the target options before resetting the optimization + // level below. + // FIXME: This is a horrible hack and should be processed via + // codegen looking at the optimization level explicitly when + // it wants to look at it. + TM.resetTargetOptions(Fn); // Reset OptLevel to None for optnone functions. CodeGenOpt::Level NewOptLevel = OptLevel; if (Fn.hasFnAttribute(Attribute::OptimizeNone)) NewOptLevel = CodeGenOpt::None; OptLevelChanger OLC(*this, NewOptLevel); + TII = MF->getSubtarget().getInstrInfo(); + TLI = MF->getSubtarget().getTargetLowering(); + RegInfo = &MF->getRegInfo(); + AA = &getAnalysis(); + LibInfo = &getAnalysis(); + GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast(Fn), this); - CurDAG->init(*MF, TLI); + CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -454,7 +454,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. MachineBasicBlock *EntryMBB = MF->begin(); - RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); DenseMap LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) @@ -489,15 +490,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { "- add if needed"); MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; - const MDNode *Variable = - MI->getOperand(MI->getNumOperands()-1).getMetadata(); + const MDNode *Variable = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, - LDI->second, Offset, Variable); + TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset, + Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -516,11 +516,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } if (CopyUseMI) { MachineInstr *NewMI = - BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, - CopyUseMI->getOperand(0).getReg(), - Offset, Variable); + BuildMI(*MF, CopyUseMI->getDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -534,7 +532,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { break; for (const auto &MI : MBB) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode()); + const MCInstrDesc &MCID = TII->get(MI.getOpcode()); if ((MCID.isCall() && !MCID.isReturn()) || MI.isStackAligningInlineAsm()) { MFI->setHasCalls(true); @@ -617,7 +615,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { SDNode *N = Worklist.pop_back_val(); // If we've already seen this node, ignore it. - if (!VisitedNodes.insert(N)) + if (!VisitedNodes.insert(N).second) continue; // Otherwise, add all chain operands to the worklist. @@ -901,12 +899,11 @@ void SelectionDAGISel::PrepareEHLandingPad() { // Assign the call site to the landing pad's begin label. MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); // Mark exception register as live in. - const TargetLowering *TLI = getTargetLowering(); const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); @@ -1042,7 +1039,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = nullptr; if (TM.Options.EnableFastISel) - FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); + FastIS = TLI->createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal RPOT(&Fn); @@ -1096,7 +1093,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { ++NumEntryBlocks; // Lower any arguments needed in this block if this is the entry block. - if (!FastIS->LowerArguments()) { + if (!FastIS->lowerArguments()) { // Fast isel failed to lower these arguments ++NumFastIselFailLowerArguments; if (EnableFastISelAbortArgs) @@ -1134,7 +1131,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(Inst)) { + if (FastIS->selectInstruction(Inst)) { --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and @@ -1729,7 +1726,7 @@ static SDNode *findGlueUse(SDNode *N) { /// This function recursively traverses up the operand chain, ignoring /// certain nodes. static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, - SDNode *Root, SmallPtrSet &Visited, + SDNode *Root, SmallPtrSetImpl &Visited, bool IgnoreChains) { // The NodeID's are given uniques ID's where a node ID is guaranteed to be // greater than all of its (recursive) operands. If we scan to a point where @@ -1744,7 +1741,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // Don't revisit nodes if we already scanned it and didn't fail, we know we // won't fail if we scan it again. - if (!Visited.insert(Use)) + if (!Visited.insert(Use).second) return false; for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { @@ -1861,8 +1858,8 @@ SDNode SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast(Op->getOperand(0)); const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); - unsigned Reg = getTargetLowering()->getRegisterByName( - RegStr->getString().data(), Op->getValueType(0)); + unsigned Reg = + TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0)); SDValue New = CurDAG->getCopyFromReg( CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); @@ -1874,8 +1871,8 @@ SDNode SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); - unsigned Reg = getTargetLowering()->getRegisterByName( - RegStr->getString().data(), Op->getOperand(2).getValueType()); + unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), + Op->getOperand(2).getValueType()); SDValue New = CurDAG->getCopyToReg( CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); @@ -2375,7 +2372,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: - Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering()); + Result = !::CheckType(Table, Index, N, SDISel.TLI); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: @@ -2385,14 +2382,15 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: - Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(), - Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); + Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Table[Index - 1] - + SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: - Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering()); + Result = !::CheckValueType(Table, Index, N, SDISel.TLI); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); @@ -2436,6 +2434,42 @@ struct MatchScope { bool HasChainNodesMatched, HasGlueResultNodesMatched; }; +/// \\brief A DAG update listener to keep the matching state +/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to +/// change the DAG while matching. X86 addressing mode matcher is an example +/// for this. +class MatchStateUpdater : public SelectionDAG::DAGUpdateListener +{ + SmallVectorImpl > &RecordedNodes; + SmallVectorImpl &MatchScopes; +public: + MatchStateUpdater(SelectionDAG &DAG, + SmallVectorImpl > &RN, + SmallVectorImpl &MS) : + SelectionDAG::DAGUpdateListener(DAG), + RecordedNodes(RN), MatchScopes(MS) { } + + void NodeDeleted(SDNode *N, SDNode *E) { + // Some early-returns here to avoid the search if we deleted the node or + // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we + // do, so it's unnecessary to update matching state at that point). + // Neither of these can occur currently because we only install this + // update listener during matching a complex patterns. + if (!E || E->isMachineOpcode()) + return; + // Performing linear search here does not matter because we almost never + // run this code. You'd have to have a CSE during complex pattern + // matching. + for (auto &I : RecordedNodes) + if (I.first.getNode() == N) + I.first.setNode(E); + + for (auto &I : MatchScopes) + for (auto &J : I.NodeStack) + if (J.getNode() == N) + J.setNode(E); + } +}; } SDNode *SelectionDAGISel:: @@ -2449,8 +2483,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::BasicBlock: case ISD::Register: case ISD::RegisterMask: - //case ISD::VALUETYPE: - //case ISD::CONDCODE: case ISD::HANDLENODE: case ISD::MDNODE_SDNODE: case ISD::TargetConstant: @@ -2692,6 +2724,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned CPNum = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + + // If target can modify DAG during matching, keep the matching state + // consistent. + std::unique_ptr MSU; + if (ComplexPatternFuncMutatesDAG()) + MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MatchScopes)); + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, RecordedNodes[RecNo].first, CPNum, RecordedNodes)) @@ -2703,7 +2743,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; case OPC_CheckType: - if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering())) + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; continue; @@ -2751,7 +2791,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = getTargetLowering()->getPointerTy(); + CaseVT = TLI->getPointerTy(); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) @@ -2773,7 +2813,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckChild2Type: case OPC_CheckChild3Type: case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: - if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(), + if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, Opcode-OPC_CheckChild0Type)) break; continue; @@ -2781,7 +2821,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: - if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering())) + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; continue; case OPC_CheckInteger: @@ -2980,7 +3020,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; - if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy; + if (VT == MVT::iPTR) + VT = TLI->getPointerTy().SimpleTy; VTs.push_back(VT); } @@ -3076,7 +3117,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated // instruction may load or store. - const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc); + const MCInstrDesc &MCID = TII->get(TargetOpc); bool mayLoad = MCID.mayLoad(); bool mayStore = MCID.mayStore(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 42372a2..9aef5ed 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -31,13 +31,13 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; -/// NOTE: The constructor takes ownership of TLOF. -TargetLowering::TargetLowering(const TargetMachine &tm, - const TargetLoweringObjectFile *tlof) - : TargetLoweringBase(tm, tlof) {} +/// NOTE: The TargetMachine owns TLOF. +TargetLowering::TargetLowering(const TargetMachine &tm) + : TargetLoweringBase(tm) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; @@ -2177,7 +2177,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, std::make_pair(0u, static_cast(nullptr)); // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *RI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -2239,14 +2240,11 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( // Do a prepass over the constraints, canonicalizing them, and building up the // ConstraintOperands list. - InlineAsm::ConstraintInfoVector - ConstraintInfos = IA->ParseConstraints(); - unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { - ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i])); + for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + ConstraintOperands.emplace_back(std::move(CI)); AsmOperandInfo &OpInfo = ConstraintOperands.back(); // Update multiple alternative constraint count. @@ -2325,7 +2323,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } // If we have multiple alternative constraints, select the best alternative. - if (ConstraintInfos.size()) { + if (ConstraintOperands.size()) { if (maCount) { unsigned bestMAIndex = 0; int bestWeight = -1; @@ -2641,11 +2639,13 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, /// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector *Created) const { + assert(Created && "No vector to hold sdiv ops."); + EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2673,38 +2673,36 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, // If d > 0 and m < 0, add the numerator if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // If d < 0 and m > 0, subtract the numerator. if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // Shift right algebraic if shift value is nonzero if (magics.s > 0) { Q = DAG.getNode(ISD::SRA, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getScalarSizeInBits() - 1, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(T.getNode()); + Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); } /// \brief Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector *Created) const { + assert(Created && "No vector to hold udiv ops."); + EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2725,8 +2723,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, unsigned Shift = Divisor.countTrailingZeros(); Q = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. magics = Divisor.lshr(Shift).magicu(Shift); @@ -2744,8 +2741,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, DAG.getConstant(magics.m, VT)).getNode(), 1); else return SDValue(); // No mulhu or equvialent - if (Created) - Created->push_back(Q.getNode()); + + Created->push_back(Q.getNode()); if (magics.a == 0) { assert(magics.s < Divisor.getBitWidth() && @@ -2754,15 +2751,12 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType()))); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); return DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } @@ -2785,7 +2779,7 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH, - SDValue RL, SDValue RH) const { + SDValue RL, SDValue RH) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2818,8 +2812,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // The inputs are both zero-extended. if (HasUMUL_LOHI) { // We can emit a umul_lohi. - Lo = DAG.getNode(ISD::UMUL_LOHI, dl, - DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, + RL); Hi = SDValue(Lo.getNode(), 1); return true; } @@ -2834,8 +2828,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // The input values are both sign-extended. if (HasSMUL_LOHI) { // We can emit a smul_lohi. - Lo = DAG.getNode(ISD::SMUL_LOHI, dl, - DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, + RL); Hi = SDValue(Lo.getNode(), 1); return true; } @@ -2885,3 +2879,65 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, } return false; } + +bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + SDLoc dl(SDValue(Node, 0)); + + // FIXME: Only f32 to i64 conversions are supported. + if (VT != MVT::f32 || NVT != MVT::i64) + return false; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits()); + SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, IntVT); + SDValue Bias = DAG.getConstant(127, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), + IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); + + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); + + SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT))); + SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); + + SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT))); + Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); + + SDValue R = DAG.getNode(ISD::OR, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, IntVT)); + + R = DAG.getZExtOrTrunc(R, dl, NVT); + + + R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, getShiftAmountTy(IntVT))), + DAG.getNode(ISD::SRL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, getShiftAmountTy(IntVT))), + ISD::SETGT); + + SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(ISD::XOR, dl, NVT, R, Sign), + Sign); + + Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), + DAG.getConstant(0, NVT), Ret, ISD::SETLT); + return true; +} diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index f7c64da..0be00f0 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -144,7 +144,7 @@ namespace { LLVMContext &C = F.getContext(); BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), - Type::getInt32Ty(C), NULL); + Type::getInt32Ty(C), nullptr); Constant *PersFn = F.getParent()-> getOrInsertFunction("__gcc_personality_v0", diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index d2f3955..7fd8107 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -98,7 +99,7 @@ bool SjLjEHPrepare::doInitialization(Module &M) { VoidPtrTy, // __personality VoidPtrTy, // __lsda ArrayType::get(VoidPtrTy, 5), // __jbuf - NULL); + nullptr); RegisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); @@ -138,8 +139,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. static void MarkBlocksLiveIn(BasicBlock *BB, - SmallPtrSet &LiveBBs) { - if (!LiveBBs.insert(BB)) + SmallPtrSetImpl &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) @@ -190,7 +191,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. - const TargetLowering *TLI = TM->getTargetLowering(); + const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", @@ -249,34 +250,16 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { ++AI) { Type *Ty = AI->getType(); - // Aggregate types can't be cast, but are legal argument types, so we have - // to handle them differently. We use an extract/insert pair as a - // lightweight method to achieve the same goal. - if (isa(Ty) || isa(Ty)) { - Instruction *EI = ExtractValueInst::Create(AI, 0, "", AfterAllocaInsPt); - Instruction *NI = InsertValueInst::Create(AI, EI, 0); - NI->insertAfter(EI); - AI->replaceAllUsesWith(NI); - - // Set the operand of the instructions back to the AllocaInst. - EI->setOperand(0, AI); - NI->setOperand(0, AI); - } else { - // This is always a no-op cast because we're casting AI to AI->getType() - // so src and destination types are identical. BitCast is the only - // possibility. - CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", - AfterAllocaInsPt); - AI->replaceAllUsesWith(NC); - - // Set the operand of the cast instruction back to the AllocaInst. - // Normally it's forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, we're - // replacing it here with the same value it was constructed with. We do - // this because the above replaceAllUsesWith() clobbered the operand, but - // we want this one to remain. - NC->setOperand(0, AI); - } + // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. + Value *TrueValue = ConstantInt::getTrue(F.getContext()); + Value *UndefValue = UndefValue::get(Ty); + Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue, + AI->getName() + ".tmp", + AfterAllocaInsPt); + AI->replaceAllUsesWith(SI); + + // Reset the operand, because it was clobbered by the RAUW above. + SI->setOperand(1, AI); } } @@ -368,10 +351,8 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, continue; // Demote the PHIs to the stack. - for (SmallPtrSet::iterator I = PHIsToDemote.begin(), - E = PHIsToDemote.end(); - I != E; ++I) - DemotePHIToStack(*I); + for (PHINode *PN : PHIsToDemote) + DemotePHIToStack(PN); // Move the landingpad instruction back to the top of the landing pad block. LPI->moveBefore(UnwindBlock->begin()); diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 24e94d1..97a5424 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -37,6 +37,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" +#include "llvm/Support/ManagedStatic.h" using namespace llvm; @@ -60,27 +61,6 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -namespace { -static BlockFrequency Threshold; -} - -/// Decision threshold. A node gets the output value 0 if the weighted sum of -/// its inputs falls in the open interval (-Threshold;Threshold). -static BlockFrequency getThreshold() { return Threshold; } - -/// \brief Set the threshold for a given entry frequency. -/// -/// Set the threshold relative to \c Entry. Since the threshold is used as a -/// bound on the open interval (-Threshold;Threshold), 1 is the minimum -/// threshold. -static void setThreshold(const BlockFrequency &Entry) { - // Apparently 2 is a good threshold when Entry==2^14, but we need to scale - // it. Divide by 2^13, rounding as appropriate. - uint64_t Freq = Entry.getFrequency(); - uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12)); - Threshold = std::max(UINT64_C(1), Scaled); -} - /// Node - Each edge bundle corresponds to a Hopfield node. /// /// The node contains precomputed frequency data that only depends on the CFG, @@ -126,9 +106,9 @@ struct SpillPlacement::Node { /// clear - Reset per-query data, but preserve frequencies that only depend on // the CFG. - void clear() { + void clear(const BlockFrequency &Threshold) { BiasN = BiasP = Value = 0; - SumLinkWeights = getThreshold(); + SumLinkWeights = Threshold; Links.clear(); } @@ -166,7 +146,7 @@ struct SpillPlacement::Node { /// update - Recompute Value from Bias and Links. Return true when node /// preference changes. - bool update(const Node nodes[]) { + bool update(const Node nodes[], const BlockFrequency &Threshold) { // Compute the weighted sum of inputs. BlockFrequency SumN = BiasN; BlockFrequency SumP = BiasP; @@ -186,9 +166,9 @@ struct SpillPlacement::Node { // 2. It helps tame rounding errors when the links nominally sum to 0. // bool Before = preferReg(); - if (SumN >= SumP + getThreshold()) + if (SumN >= SumP + Threshold) Value = -1; - else if (SumP >= SumN + getThreshold()) + else if (SumP >= SumN + Threshold) Value = 1; else Value = 0; @@ -227,7 +207,7 @@ void SpillPlacement::activate(unsigned n) { if (ActiveNodes->test(n)) return; ActiveNodes->set(n); - nodes[n].clear(); + nodes[n].clear(Threshold); // Very large bundles usually come from big switches, indirect branches, // landing pads, or loops with many 'continue' statements. It is difficult to @@ -244,6 +224,18 @@ void SpillPlacement::activate(unsigned n) { } } +/// \brief Set the threshold for a given entry frequency. +/// +/// Set the threshold relative to \c Entry. Since the threshold is used as a +/// bound on the open interval (-Threshold;Threshold), 1 is the minimum +/// threshold. +void SpillPlacement::setThreshold(const BlockFrequency &Entry) { + // Apparently 2 is a good threshold when Entry==2^14, but we need to scale + // it. Divide by 2^13, rounding as appropriate. + uint64_t Freq = Entry.getFrequency(); + uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12)); + Threshold = std::max(UINT64_C(1), Scaled); +} /// addConstraints - Compute node biases and weights from a set of constraints. /// Set a bit in NodeMask for each active node. @@ -310,7 +302,7 @@ bool SpillPlacement::scanActiveBundles() { Linked.clear(); RecentPositive.clear(); for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { - nodes[n].update(nodes); + nodes[n].update(nodes, Threshold); // A node that must spill, or a node without any links is not going to // change its value ever again, so exclude it from iterations. if (nodes[n].mustSpill()) @@ -330,7 +322,7 @@ void SpillPlacement::iterate() { // First update the recently positive nodes. They have likely received new // negative bias that will turn them off. while (!RecentPositive.empty()) - nodes[RecentPositive.pop_back_val()].update(nodes); + nodes[RecentPositive.pop_back_val()].update(nodes, Threshold); if (Linked.empty()) return; @@ -349,7 +341,7 @@ void SpillPlacement::iterate() { iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { unsigned n = *I; - if (nodes[n].update(nodes)) { + if (nodes[n].update(nodes, Threshold)) { Changed = true; if (nodes[n].preferReg()) RecentPositive.push_back(n); @@ -363,7 +355,7 @@ void SpillPlacement::iterate() { for (SmallVectorImpl::const_iterator I = std::next(Linked.begin()), E = Linked.end(); I != E; ++I) { unsigned n = *I; - if (nodes[n].update(nodes)) { + if (nodes[n].update(nodes, Threshold)) { Changed = true; if (nodes[n].preferReg()) RecentPositive.push_back(n); diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index 43fc7f5..622361e 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -24,8 +24,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H -#define LLVM_CODEGEN_SPILLPLACEMENT_H +#ifndef LLVM_LIB_CODEGEN_SPILLPLACEMENT_H +#define LLVM_LIB_CODEGEN_SPILLPLACEMENT_H #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -40,7 +40,7 @@ class MachineBasicBlock; class MachineLoopInfo; class MachineBlockFrequencyInfo; -class SpillPlacement : public MachineFunctionPass { +class SpillPlacement : public MachineFunctionPass { struct Node; const MachineFunction *MF; const EdgeBundles *bundles; @@ -60,7 +60,11 @@ class SpillPlacement : public MachineFunctionPass { SmallVector RecentPositive; // Block frequencies are computed once. Indexed by block number. - SmallVector BlockFrequencies; + SmallVector BlockFrequencies; + + /// Decision threshold. A node gets the output value 0 if the weighted sum of + /// its inputs falls in the open interval (-Threshold;Threshold). + BlockFrequency Threshold; public: static char ID; // Pass identification, replacement for typeid. @@ -152,6 +156,7 @@ private: void releaseMemory() override; void activate(unsigned); + void setThreshold(const BlockFrequency &Entry); }; } // end namespace llvm diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp deleted file mode 100644 index 0649448..0000000 --- a/lib/CodeGen/Spiller.cpp +++ /dev/null @@ -1,184 +0,0 @@ -//===-- llvm/CodeGen/Spiller.cpp - Spiller -------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "Spiller.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -#define DEBUG_TYPE "spiller" - -namespace { - enum SpillerName { trivial, inline_ }; -} - -static cl::opt -spillerOpt("spiller", - cl::desc("Spiller to use: (default: standard)"), - cl::Prefix, - cl::values(clEnumVal(trivial, "trivial spiller"), - clEnumValN(inline_, "inline", "inline spiller"), - clEnumValEnd), - cl::init(trivial)); - -// Spiller virtual destructor implementation. -Spiller::~Spiller() {} - -namespace { - -/// Utility class for spillers. -class SpillerBase : public Spiller { -protected: - MachineFunctionPass *pass; - MachineFunction *mf; - VirtRegMap *vrm; - LiveIntervals *lis; - MachineFrameInfo *mfi; - MachineRegisterInfo *mri; - const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; - - /// Construct a spiller base. - SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : pass(&pass), mf(&mf), vrm(&vrm) - { - lis = &pass.getAnalysis(); - mfi = mf.getFrameInfo(); - mri = &mf.getRegInfo(); - tii = mf.getTarget().getInstrInfo(); - tri = mf.getTarget().getRegisterInfo(); - } - - /// Add spill ranges for every use/def of the live interval, inserting loads - /// immediately before each use, and stores after each def. No folding or - /// remat is attempted. - void trivialSpillEverywhere(LiveRangeEdit& LRE) { - LiveInterval* li = &LRE.getParent(); - - DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); - - assert(li->weight != llvm::huge_valf && - "Attempting to spill already spilled value."); - - assert(!TargetRegisterInfo::isStackSlot(li->reg) && - "Trying to spill a stack slot."); - - DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); - - const TargetRegisterClass *trc = mri->getRegClass(li->reg); - unsigned ss = vrm->assignVirt2StackSlot(li->reg); - - // Iterate over reg uses/defs. - for (MachineRegisterInfo::reg_instr_iterator - regItr = mri->reg_instr_begin(li->reg); - regItr != mri->reg_instr_end();) { - - // Grab the use/def instr. - MachineInstr *mi = &*regItr; - - DEBUG(dbgs() << " Processing " << *mi); - - // Step regItr to the next use/def instr. - ++regItr; - - // Collect uses & defs for this instr. - SmallVector indices; - bool hasUse = false; - bool hasDef = false; - for (unsigned i = 0; i != mi->getNumOperands(); ++i) { - MachineOperand &op = mi->getOperand(i); - if (!op.isReg() || op.getReg() != li->reg) - continue; - hasUse |= mi->getOperand(i).isUse(); - hasDef |= mi->getOperand(i).isDef(); - indices.push_back(i); - } - - // Create a new virtual register for the load and/or store. - unsigned NewVReg = LRE.create(); - - // Update the reg operands & kill flags. - for (unsigned i = 0; i < indices.size(); ++i) { - unsigned mopIdx = indices[i]; - MachineOperand &mop = mi->getOperand(mopIdx); - mop.setReg(NewVReg); - if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { - mop.setIsKill(true); - } - } - assert(hasUse || hasDef); - - // Insert reload if necessary. - MachineBasicBlock::iterator miItr(mi); - if (hasUse) { - MachineInstrSpan MIS(miItr); - - tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc, - tri); - lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr); - } - - // Insert store if necessary. - if (hasDef) { - MachineInstrSpan MIS(miItr); - - tii->storeRegToStackSlot(*mi->getParent(), std::next(miItr), NewVReg, - true, ss, trc, tri); - lis->InsertMachineInstrRangeInMaps(std::next(miItr), MIS.end()); - } - } - } -}; - -} // end anonymous namespace - -namespace { - -/// Spills any live range using the spill-everywhere method with no attempt at -/// folding. -class TrivialSpiller : public SpillerBase { -public: - - TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : SpillerBase(pass, mf, vrm) {} - - void spill(LiveRangeEdit &LRE) override { - // Ignore spillIs - we don't use it. - trivialSpillEverywhere(LRE); - } -}; - -} // end anonymous namespace - -void Spiller::anchor() { } - -llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm) { - switch (spillerOpt) { - case trivial: return new TrivialSpiller(pass, mf, vrm); - case inline_: return createInlineSpiller(pass, mf, vrm); - } - llvm_unreachable("Invalid spiller optimization"); -} diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index b7d5bea..08f99ec 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SPILLER_H -#define LLVM_CODEGEN_SPILLER_H +#ifndef LLVM_LIB_CODEGEN_SPILLER_H +#define LLVM_LIB_CODEGEN_SPILLER_H namespace llvm { @@ -31,11 +31,6 @@ namespace llvm { }; - /// Create and return a spiller object, as specified on the command line. - Spiller* createSpiller(MachineFunctionPass &pass, - MachineFunction &mf, - VirtRegMap &vrm); - /// Create and return a spiller that will insert spill code directly instead /// of deferring though VirtRegMap. Spiller *createInlineSpiller(MachineFunctionPass &pass, diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 7d4f568..ea7b914 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -40,16 +40,11 @@ STATISTIC(NumRepairs, "Number of invalid live ranges repaired"); // Split Analysis //===----------------------------------------------------------------------===// -SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, - const LiveIntervals &lis, +SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, const MachineLoopInfo &mli) - : MF(vrm.getMachineFunction()), - VRM(vrm), - LIS(lis), - Loops(mli), - TII(*MF.getTarget().getInstrInfo()), - CurLI(nullptr), - LastSplitPoint(MF.getNumBlockIDs()) {} + : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli), + TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr), + LastSplitPoint(MF.getNumBlockIDs()) {} void SplitAnalysis::clear() { UseSlots.clear(); @@ -321,22 +316,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &sa, - LiveIntervals &lis, - VirtRegMap &vrm, +SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, MachineDominatorTree &mdt, MachineBlockFrequencyInfo &mbfi) - : SA(sa), LIS(lis), VRM(vrm), - MRI(vrm.getMachineFunction().getRegInfo()), - MDT(mdt), - TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), - TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), - MBFI(mbfi), - Edit(nullptr), - OpenIdx(0), - SpillMode(SM_Partition), - RegAssign(Allocator) -{} + : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()), + MDT(mdt), TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()), + TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()), + MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition), + RegAssign(Allocator) {} void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { Edit = &LRE; diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 7048ee3..2e60c14 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SPLITKIT_H -#define LLVM_CODEGEN_SPLITKIT_H +#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H +#define LLVM_LIB_CODEGEN_SPLITKIT_H #include "LiveRangeCalc.h" #include "llvm/ADT/ArrayRef.h" diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 370430c..dcf1b44 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -228,7 +228,7 @@ void StackColoring::dump() const { unsigned StackColoring::collectMarkers(unsigned NumSlot) { unsigned MarkersFound = 0; // Scan the function to find all lifetime markers. - // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a + // NOTE: We use a reverse-post-order iteration to ensure that we obtain a // deterministic numbering, and because we'll need a post-order iteration // later for solving the liveness dataflow problem. for (MachineBasicBlock *MBB : depth_first(MF)) { diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index 3ba502f..c2ee87a 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/StackMapLivenessAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" - +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -67,7 +67,7 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << _MF.getName() << " **********\n"); MF = &_MF; - TRI = MF->getTarget().getRegisterInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); ++NumStackMapFuncVisited; // Skip this function if there are no patchpoints to process. diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index 1473fc1..d3791c3 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -83,7 +84,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, switch (MOI->getImm()) { default: llvm_unreachable("Unrecognized operand type."); case StackMaps::DirectMemRefOp: { - unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits(); + unsigned Size = + AP.TM.getSubtargetImpl()->getDataLayout()->getPointerSizeInBits(); assert((Size % 8) == 0 && "Need pointer size in bytes."); Size /= 8; unsigned Reg = (++MOI)->getReg(); @@ -122,7 +124,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && "Virtreg operands should have been rewritten before now."); const TargetRegisterClass *RC = - AP.TM.getRegisterInfo()->getMinimalPhysRegClass(MOI->getReg()); + AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass( + MOI->getReg()); assert(!MOI->getSubReg() && "Physical subreg still around."); Locs.push_back( Location(Location::Register, RC->getSize(), MOI->getReg(), 0)); @@ -158,7 +161,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { StackMaps::LiveOutVec StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { assert(Mask && "No register mask specified"); - const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); LiveOutVec LiveOuts; // Create a LiveOutReg for each bit that is set in the register mask. @@ -217,9 +220,18 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, I != E; ++I) { // Constants are encoded as sign-extended integers. // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool. - if (I->LocType == Location::Constant && - ((I->Offset + (int64_t(1)<<31)) >> 32) != 0) { + if (I->LocType == Location::Constant && !isInt<32>(I->Offset)) { I->LocType = Location::ConstantIndex; + // ConstPool is intentionally a MapVector of 'uint64_t's (as + // opposed to 'int64_t's). We should never be in a situation + // where we have to insert either the tombstone or the empty + // keys into a map, and for a DenseMap these are + // (uint64_t)0 and (uint64_t)-1. They can be and are + // represented using 32 bit integers. + + assert((uint64_t)I->Offset != DenseMapInfo::getEmptyKey() && + (uint64_t)I->Offset != DenseMapInfo::getTombstoneKey() && + "empty and tombstone keys should fit in 32 bits!"); auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset)); I->Offset = Result.first - ConstPool.begin(); } @@ -232,12 +244,16 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), OutContext); - CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, Locations, LiveOuts)); + CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations), + std::move(LiveOuts)); // Record the stack size of the current function. const MachineFrameInfo *MFI = AP.MF->getFrameInfo(); + const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo(); + const bool DynamicFrameSize = MFI->hasVarSizedObjects() || + RegInfo->needsStackRealignment(*(AP.MF)); FnStackSize[AP.CurrentFnSym] = - MFI->hasVarSizedObjects() ? UINT64_MAX : MFI->getStackSize(); + DynamicFrameSize ? UINT64_MAX : MFI->getStackSize(); } void StackMaps::recordStackMap(const MachineInstr &MI) { @@ -485,7 +501,7 @@ void StackMaps::serializeToStackMapSection() { MCContext &OutContext = AP.OutStreamer.getContext(); MCStreamer &OS = AP.OutStreamer; - const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo(); // Create the section. const MCSection *StackMapSection = diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index accfe7b..45f97ac 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -85,7 +86,7 @@ bool StackProtector::runOnFunction(Function &Fn) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; - TLI = TM->getTargetLowering(); + TLI = TM->getSubtargetImpl()->getTargetLowering(); Attribute Attr = Fn.getAttributes().getAttribute( AttributeSet::FunctionIndex, "stack-protector-buffer-size"); @@ -168,7 +169,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { } else if (const PHINode *PN = dyn_cast(U)) { // Keep track of what PHI nodes we have already visited to ensure // they are only visited once. - if (VisitedPHIs.insert(PN)) + if (VisitedPHIs.insert(PN).second) if (HasAddressTaken(PN)) return true; } else if (const GetElementPtrInst *GEP = dyn_cast(U)) { @@ -479,12 +480,12 @@ BasicBlock *StackProtector::CreateFailBB() { if (Trip.getOS() == llvm::Triple::OpenBSD) { Constant *StackChkFail = M->getOrInsertFunction( "__stack_smash_handler", Type::getVoidTy(Context), - Type::getInt8PtrTy(Context), NULL); + Type::getInt8PtrTy(Context), nullptr); B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); } else { Constant *StackChkFail = M->getOrInsertFunction( - "__stack_chk_fail", Type::getVoidTy(Context), NULL); + "__stack_chk_fail", Type::getVoidTy(Context), nullptr); B.CreateCall(StackChkFail); } B.CreateUnreachable(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 791168f..cc72e5e 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -28,7 +28,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -422,7 +422,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { }); MFI = MF.getFrameInfo(); - TII = MF.getTarget().getInstrInfo(); + TII = MF.getSubtarget().getInstrInfo(); LS = &getAnalysis(); MBFI = &getAnalysis(); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 723a629..4377236 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "tailduplication" @@ -135,8 +136,8 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable(); MBPI = &getAnalysis(); @@ -798,11 +799,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, RS->enterBasicBlock(PredBB); if (!PredBB->empty()) RS->forward(std::prev(PredBB->end())); - BitVector RegsLiveAtExit(TRI->getNumRegs()); - RS->getRegsUsed(RegsLiveAtExit, false); for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(), E = TailBB->livein_end(); I != E; ++I) { - if (!RegsLiveAtExit[*I]) + if (!RS->isRegUsed(*I, false)) // If a register is previously livein to the tail but it's not live // at the end of predecessor BB, then it should be added to its // livein list. diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index 883e9d1..1557d10 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -14,8 +14,8 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -26,7 +26,7 @@ TargetFrameLowering::~TargetFrameLowering() { /// the stack frame of the specified index. This is the default implementation /// which is overridden for some targets. int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { + int FI) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->getObjectOffset(FI) + MFI->getStackSize() - getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); @@ -34,7 +34,7 @@ int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { - const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); // By default, assume all frame indices are referenced via whatever // getFrameRegister() says. The target can override this if it's doing diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index 83966bd0..ab45f89 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -290,13 +290,15 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, Offset = 0; return true; } - unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx); + unsigned BitSize = + TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx); // Convert bit size to byte size to be consistent with // MCRegisterClass::getSize(). if (BitSize % 8) return false; - int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + int BitOffset = + TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx); if (BitOffset < 0 || BitOffset % 8) return false; @@ -305,7 +307,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); - if (!TM->getDataLayout()->isLittleEndian()) { + if (!TM->getSubtargetImpl()->getDataLayout()->isLittleEndian()) { Offset = RC->getSize() - (Offset + Size); } return true; @@ -370,6 +372,10 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, return nullptr; } +void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + llvm_unreachable("Not a MachO target"); +} + bool TargetInstrInfo:: canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl &Ops) const { @@ -498,7 +504,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const MachineOperand &MO = MI->getOperand(1-Ops[0]); MachineBasicBlock::iterator Pos = MI; - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (Flags == MachineMemOperand::MOStore) storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); @@ -562,8 +568,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, AliasAnalysis *AA) const { const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetMachine &TM = MF.getTarget(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); // Remat clients assume operand 0 is the defined register. if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) @@ -582,7 +586,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, // redundant with subsequent checks, but it's target-independent, // simple, and a common case. int FrameIdx = 0; - if (TII.isLoadFromStackSlot(MI, FrameIdx) && + if (isLoadFromStackSlot(MI, FrameIdx) && MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) return true; @@ -655,8 +659,8 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // saves compile time, because it doesn't require every single // stack slot reference to depend on the instruction that does the // modification. - const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI)) return true; @@ -746,14 +750,14 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, } /// Return the default expected latency for a def based on it's opcode. -unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel, const MachineInstr *DefMI) const { if (DefMI->isTransient()) return 0; if (DefMI->mayLoad()) - return SchedModel->LoadLatency; + return SchedModel.LoadLatency; if (isHighLatencyDef(DefMI->getOpcode())) - return SchedModel->HighLatency; + return SchedModel.HighLatency; return 1; } @@ -852,3 +856,77 @@ computeOperandLatency(const InstrItineraryData *ItinData, defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } + +bool TargetInstrInfo::getRegSequenceInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl &InputRegs) const { + assert((MI.isRegSequence() || + MI.isRegSequenceLike()) && "Instruction do not have the proper type"); + + if (!MI.isRegSequence()) + return getRegSequenceLikeInputs(MI, DefIdx, InputRegs); + + // We are looking at: + // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... + assert(DefIdx == 0 && "REG_SEQUENCE only has one def"); + for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; + OpIdx += 2) { + const MachineOperand &MOReg = MI.getOperand(OpIdx); + const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + // Record Reg:SubReg, SubIdx. + InputRegs.push_back(RegSubRegPairAndIdx(MOReg.getReg(), MOReg.getSubReg(), + (unsigned)MOSubIdx.getImm())); + } + return true; +} + +bool TargetInstrInfo::getExtractSubregInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { + assert((MI.isExtractSubreg() || + MI.isExtractSubregLike()) && "Instruction do not have the proper type"); + + if (!MI.isExtractSubreg()) + return getExtractSubregLikeInputs(MI, DefIdx, InputReg); + + // We are looking at: + // Def = EXTRACT_SUBREG v0.sub1, sub0. + assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def"); + const MachineOperand &MOReg = MI.getOperand(1); + const MachineOperand &MOSubIdx = MI.getOperand(2); + assert(MOSubIdx.isImm() && + "The subindex of the extract_subreg is not an immediate"); + + InputReg.Reg = MOReg.getReg(); + InputReg.SubReg = MOReg.getSubReg(); + InputReg.SubIdx = (unsigned)MOSubIdx.getImm(); + return true; +} + +bool TargetInstrInfo::getInsertSubregInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const { + assert((MI.isInsertSubreg() || + MI.isInsertSubregLike()) && "Instruction do not have the proper type"); + + if (!MI.isInsertSubreg()) + return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg); + + // We are looking at: + // Def = INSERT_SEQUENCE v0, v1, sub0. + assert(DefIdx == 0 && "INSERT_SUBREG only has one def"); + const MachineOperand &MOBaseReg = MI.getOperand(1); + const MachineOperand &MOInsertedReg = MI.getOperand(2); + const MachineOperand &MOSubIdx = MI.getOperand(3); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + BaseReg.Reg = MOBaseReg.getReg(); + BaseReg.SubReg = MOBaseReg.getSubReg(); + + InsertedReg.Reg = MOInsertedReg.getReg(); + InsertedReg.SubReg = MOInsertedReg.getSubReg(); + InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm(); + return true; +} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index c574fd4..e833fd3 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -34,6 +34,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -205,6 +206,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FLOOR_F80] = "floorl"; Names[RTLIB::FLOOR_F128] = "floorl"; Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::FMIN_F32] = "fminf"; + Names[RTLIB::FMIN_F64] = "fmin"; + Names[RTLIB::FMIN_F80] = "fminl"; + Names[RTLIB::FMIN_F128] = "fminl"; + Names[RTLIB::FMIN_PPCF128] = "fminl"; + Names[RTLIB::FMAX_F32] = "fmaxf"; + Names[RTLIB::FMAX_F64] = "fmax"; + Names[RTLIB::FMAX_F80] = "fmaxl"; + Names[RTLIB::FMAX_F128] = "fmaxl"; + Names[RTLIB::FMAX_PPCF128] = "fmaxl"; Names[RTLIB::ROUND_F32] = "roundf"; Names[RTLIB::ROUND_F64] = "round"; Names[RTLIB::ROUND_F80] = "roundl"; @@ -220,6 +231,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2"; + Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2"; + Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2"; + Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2"; Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; @@ -418,7 +433,10 @@ static void InitLibcallCallingConvs(CallingConv::ID *CCs) { /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::f32) + return FPEXT_F16_F32; + } else if (OpVT == MVT::f32) { if (RetVT == MVT::f64) return FPEXT_F32_F64; if (RetVT == MVT::f128) @@ -434,7 +452,18 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { /// getFPROUND - Return the FPROUND_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { - if (RetVT == MVT::f32) { + if (RetVT == MVT::f16) { + if (OpVT == MVT::f32) + return FPROUND_F32_F16; + if (OpVT == MVT::f64) + return FPROUND_F64_F16; + if (OpVT == MVT::f80) + return FPROUND_F80_F16; + if (OpVT == MVT::f128) + return FPROUND_F128_F16; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F16; + } else if (RetVT == MVT::f32) { if (OpVT == MVT::f64) return FPROUND_F64_F32; if (OpVT == MVT::f80) @@ -665,10 +694,9 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { CCs[RTLIB::O_F128] = ISD::SETEQ; } -/// NOTE: The constructor takes ownership of TLOF. -TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, - const TargetLoweringObjectFile *tlof) - : TM(tm), DL(TM.getDataLayout()), TLOF(*tlof) { +/// NOTE: The TargetMachine owns TLOF. +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) + : TM(tm), DL(TM.getSubtargetImpl()->getDataLayout()) { initActions(); // Perform these initializations only once. @@ -682,10 +710,11 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, HasMultipleConditionRegisters = false; HasExtractBitsInsn = false; IntDivIsCheap = false; - Pow2DivIsCheap = false; + Pow2SDivIsCheap = false; JumpIsExpensive = false; PredictableSelectIsExpensive = false; MaskAndBranchFoldingIsLegal = false; + HasFloatingPointExceptions = true; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; @@ -700,7 +729,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, PrefLoopAlignment = 0; MinStackArgumentAlignment = 1; InsertFencesForAtomic = false; - SupportJumpTables = true; MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple())); @@ -708,10 +736,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, InitLibcallCallingConvs(LibcallCallingConvs); } -TargetLoweringBase::~TargetLoweringBase() { - delete &TLOF; -} - void TargetLoweringBase::initActions() { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); @@ -738,6 +762,8 @@ void TargetLoweringBase::initActions() { // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FMINNUM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FMAXNUM, (MVT::SimpleValueType)VT, Expand); // These library functions default to expand. setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand); @@ -774,6 +800,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , MVT::f16, Expand); setOperationAction(ISD::FEXP2, MVT::f16, Expand); setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FMINNUM, MVT::f16, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f16, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); setOperationAction(ISD::FCEIL, MVT::f16, Expand); setOperationAction(ISD::FRINT, MVT::f16, Expand); @@ -785,6 +813,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , MVT::f32, Expand); setOperationAction(ISD::FEXP2, MVT::f32, Expand); setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FMINNUM, MVT::f32, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f32, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); setOperationAction(ISD::FCEIL, MVT::f32, Expand); setOperationAction(ISD::FRINT, MVT::f32, Expand); @@ -796,6 +826,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , MVT::f64, Expand); setOperationAction(ISD::FEXP2, MVT::f64, Expand); setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FMINNUM, MVT::f64, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); setOperationAction(ISD::FCEIL, MVT::f64, Expand); setOperationAction(ISD::FRINT, MVT::f64, Expand); @@ -807,6 +839,8 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::FEXP , MVT::f128, Expand); setOperationAction(ISD::FEXP2, MVT::f128, Expand); setOperationAction(ISD::FFLOOR, MVT::f128, Expand); + setOperationAction(ISD::FMINNUM, MVT::f128, Expand); + setOperationAction(ISD::FMAXNUM, MVT::f128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); setOperationAction(ISD::FCEIL, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); @@ -958,11 +992,10 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, // Add a new memory operand for this FI. const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - MachineMemOperand::MOLoad, - TM.getDataLayout()->getPointerSize(), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, + TM.getSubtargetImpl()->getDataLayout()->getPointerSize(), + MFI.getObjectAlignment(FI)); MIB->addMemOperand(MF, MMO); // Replace the instruction and update the operand index. @@ -978,7 +1011,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI, /// of the register class for the specified type and its associated "cost". std::pair TargetLoweringBase::findRepresentativeClass(MVT VT) const { - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; if (!RC) return std::make_pair(RC, 0); @@ -1005,8 +1039,8 @@ TargetLoweringBase::findRepresentativeClass(MVT VT) const { /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. void TargetLoweringBase::computeRegisterProperties() { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + "Too many value types for ValueTypeActions to hold!"); // Everything defaults to needing one register. for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { @@ -1089,6 +1123,13 @@ void TargetLoweringBase::computeRegisterProperties() { } } + if (!isTypeLegal(MVT::f16)) { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; + TransformToType[MVT::f16] = MVT::i16; + ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); + } + // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { @@ -1154,8 +1195,12 @@ void TargetLoweringBase::computeRegisterProperties() { TransformToType[i] = MVT::Other; if (PreferredAction == TypeScalarizeVector) ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); - else + else if (PreferredAction == TypeSplitVector) ValueTypeActions.setTypeAction(VT, TypeSplitVector); + else + // Set type action according to the number of elements. + ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector + : TypeSplitVector); } else { TransformToType[i] = NVT; ValueTypeActions.setTypeAction(VT, TypeWidenVector); diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 03f4a51..efd15e1 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; using namespace dwarf; @@ -72,9 +73,10 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Flags, SectionKind::getDataRel(), 0, Label->getName()); - unsigned Size = TM.getDataLayout()->getPointerSize(); + unsigned Size = TM.getSubtargetImpl()->getDataLayout()->getPointerSize(); Streamer.SwitchSection(Sec); - Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment()); + Streamer.EmitValueToAlignment( + TM.getSubtargetImpl()->getDataLayout()->getPointerABIAlignment()); Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); const MCExpr *E = MCConstantExpr::Create(Size, getContext()); Streamer.EmitELFSize(Label, E); @@ -287,7 +289,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: this is getting the alignment of the character, not the // alignment of the global! unsigned Align = - TM.getDataLayout()->getPreferredAlignment(cast(GV)); + TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( + cast(GV)); const char *SizeSpec = ".rodata.str1."; if (Kind.isMergeable2ByteCString()) @@ -338,8 +341,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, /// getSectionForConstant - Given a mergeable constant with the /// specified size and relocation information, return a section that it /// should be placed in. -const MCSection *TargetLoweringObjectFileELF:: -getSectionForConstant(SectionKind Kind) const { +const MCSection * +TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind, + const Constant *C) const { if (Kind.isMergeableConst4() && MergeableConst4Section) return MergeableConst4Section; if (Kind.isMergeableConst8() && MergeableConst8Section) @@ -354,44 +358,59 @@ getSectionForConstant(SectionKind Kind) const { return DataRelROSection; } -const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( - unsigned Priority, const MCSymbol *KeySym) const { - // The default scheme is .ctor / .dtor, so we have to invert the priority - // numbering. - if (Priority == 65535) - return StaticCtorSection; +static const MCSectionELF *getStaticStructorSection(MCContext &Ctx, + bool UseInitArray, + bool IsCtor, + unsigned Priority, + const MCSymbol *KeySym) { + std::string Name; + unsigned Type; + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; + SectionKind Kind = SectionKind::getDataRel(); + StringRef COMDAT = KeySym ? KeySym->getName() : ""; + + if (KeySym) + Flags |= ELF::SHF_GROUP; if (UseInitArray) { - std::string Name = std::string(".init_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); + if (IsCtor) { + Type = ELF::SHT_INIT_ARRAY; + Name = ".init_array"; + } else { + Type = ELF::SHT_FINI_ARRAY; + Name = ".fini_array"; + } + if (Priority != 65535) { + Name += '.'; + Name += utostr(Priority); + } } else { - std::string Name = std::string(".ctors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (IsCtor) + Name = ".ctors"; + else + Name = ".dtors"; + if (Priority != 65535) { + Name += '.'; + Name += utostr(65535 - Priority); + } + Type = ELF::SHT_PROGBITS; } + + return Ctx.getELFSection(Name, Type, Flags, Kind, 0, COMDAT); } -const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( +const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { - // The default scheme is .ctor / .dtor, so we have to invert the priority - // numbering. - if (Priority == 65535) - return StaticDtorSection; + return getStaticStructorSection(getContext(), UseInitArray, true, Priority, + KeySym); +} - if (UseInitArray) { - std::string Name = std::string(".fini_array.") + utostr(Priority); - return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, - ELF::SHF_ALLOC | ELF::SHF_WRITE, - SectionKind::getDataRel()); - } else { - std::string Name = std::string(".dtors.") + utostr(65535 - Priority); - return getContext().getELFSection(Name, ELF::SHT_PROGBITS, - ELF::SHF_ALLOC |ELF::SHF_WRITE, - SectionKind::getDataRel()); - } +const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return getStaticStructorSection(getContext(), UseInitArray, false, Priority, + KeySym); } void @@ -565,10 +584,29 @@ bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols( if (SMO.getKind().isMergeable1ByteCString()) return false; + if (SMO.getSegmentName() == "__TEXT" && + SMO.getSectionName() == "__objc_classname" && + SMO.getType() == MachO::S_CSTRING_LITERALS) + return false; + + if (SMO.getSegmentName() == "__TEXT" && + SMO.getSectionName() == "__objc_methname" && + SMO.getType() == MachO::S_CSTRING_LITERALS) + return false; + + if (SMO.getSegmentName() == "__TEXT" && + SMO.getSectionName() == "__objc_methtype" && + SMO.getType() == MachO::S_CSTRING_LITERALS) + return false; + if (SMO.getSegmentName() == "__DATA" && SMO.getSectionName() == "__cfstring") return false; + // no_dead_strip sections are not atomized in practice. + if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP)) + return false; + switch (SMO.getType()) { default: return true; @@ -610,17 +648,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // FIXME: Alignment check should be handled by section classifier. if (Kind.isMergeable1ByteCString() && - TM.getDataLayout()->getPreferredAlignment(cast(GV)) < 32) + TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( + cast(GV)) < 32) return CStringSection; // Do not put 16-bit arrays in the UString section if they have an // externally visible label, this runs into issues with certain linker // versions. if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && - TM.getDataLayout()->getPreferredAlignment(cast(GV)) < 32) + TM.getSubtargetImpl()->getDataLayout()->getPreferredAlignment( + cast(GV)) < 32) return UStringSection; - if (Kind.isMergeableConst()) { + // With MachO only variables whose corresponding symbol starts with 'l' or + // 'L' can be merged, so we only try merging GVs with private linkage. + if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) { if (Kind.isMergeableConst4()) return FourByteConstantSection; if (Kind.isMergeableConst8()) @@ -654,7 +696,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, } const MCSection * -TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { +TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind, + const Constant *C) const { // If this constant requires a relocation, we have to put it in the data // segment, not in the text segment. if (Kind.isDataRel() || Kind.isReadOnlyWithRel()) @@ -737,7 +780,7 @@ getCOFFSectionFlags(SectionKind K) { COFF::IMAGE_SCN_MEM_EXECUTE | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_CNT_CODE; - else if (K.isBSS ()) + else if (K.isBSS()) Flags |= COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | @@ -747,7 +790,7 @@ getCOFFSectionFlags(SectionKind K) { COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE; - else if (K.isReadOnly()) + else if (K.isReadOnly() || K.isReadOnlyWithRel()) Flags |= COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ; @@ -772,7 +815,7 @@ static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) { if (ComdatGV->getComdat() != C) report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + - "' is not a key for it's COMDAT."); + "' is not a key for its COMDAT."); return ComdatGV; } @@ -841,9 +884,9 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { return ".bss"; if (Kind.isThreadLocal()) return ".tls$"; - if (Kind.isWriteable()) - return ".data"; - return ".rdata"; + if (Kind.isReadOnly() || Kind.isReadOnlyWithRel()) + return ".rdata"; + return ".data"; } @@ -891,7 +934,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isThreadLocal()) return TLSDataSection; - if (Kind.isReadOnly()) + if (Kind.isReadOnly() || Kind.isReadOnlyWithRel()) return ReadOnlySection; // Note: we claim that common symbols are put in BSSSection, but they are @@ -958,29 +1001,14 @@ emitModuleFlags(MCStreamer &Streamer, } } -static const MCSection *getAssociativeCOFFSection(MCContext &Ctx, - const MCSection *Sec, - const MCSymbol *KeySym) { - // Return the normal section if we don't have to be associative. - if (!KeySym) - return Sec; - - // Make an associative section with the same name and kind as the normal - // section. - const MCSectionCOFF *SecCOFF = cast(Sec); - unsigned Characteristics = - SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT; - return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics, - SecCOFF->getKind(), KeySym->getName(), - COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); -} - const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( unsigned Priority, const MCSymbol *KeySym) const { - return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym); + return getContext().getAssociativeCOFFSection( + cast(StaticCtorSection), KeySym); } const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( unsigned Priority, const MCSymbol *KeySym) const { - return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym); + return getContext().getAssociativeCOFFSection( + cast(StaticDtorSection), KeySym); } diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index 3ca2017..618d903 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -51,3 +51,10 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const { StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } + +/// getCFIFuncName - If this returns a non-empty string, then it is the name of +/// the function that gets called on CFI violations in CFI non-enforcing mode +/// (!TargetOptions::CFIEnforcing). +StringRef TargetOptions::getCFIFuncName() const { + return CFIFuncName; +} diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index a3a4fb3..61a66b6 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -108,7 +109,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { /// register of the given type, picking the most sub register class of /// the right type that contains this physreg. const TargetRegisterClass * -TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { +TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { assert(isPhysicalRegister(reg) && "reg must be a physical register"); // Pick the most sub register class of the right type that contains @@ -293,3 +294,11 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, // All clear, tell the register allocator to prefer this register. Hints.push_back(Phys); } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void +TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, + const TargetRegisterInfo *TRI) { + dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n"; +} +#endif diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index b0f2ca6..ef2dab1 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -16,7 +16,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -157,7 +156,7 @@ unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *UseMI, unsigned UseOperIdx) const { if (!hasInstrSchedModel() && !hasInstrItineraries()) - return TII->defaultDefLatency(&SchedModel, DefMI); + return TII->defaultDefLatency(SchedModel, DefMI); if (hasInstrItineraries()) { int OperLatency = 0; @@ -181,7 +180,7 @@ unsigned TargetSchedModel::computeOperandLatency( // applicable to the InstrItins model. InstrSchedModel should model all // special cases without TII hooks. InstrLatency = std::max(InstrLatency, - TII->defaultDefLatency(&SchedModel, DefMI)); + TII->defaultDefLatency(SchedModel, DefMI)); return InstrLatency; } // hasInstrSchedModel() @@ -222,7 +221,29 @@ unsigned TargetSchedModel::computeOperandLatency( // FIXME: Automatically giving all implicit defs defaultDefLatency is // undesirable. We should only do it for defs that are known to the MC // desc like flags. Truly implicit defs should get 1 cycle latency. - return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI); +} + +unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { + assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); + + unsigned SCIdx = TII->get(Opcode).getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx); + unsigned Latency = 0; + + if (SCDesc->isValid() && !SCDesc->isVariant()) { + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); + } + return Latency; + } + + assert(Latency && "No MI sched latency"); + return 0; } unsigned @@ -248,7 +269,7 @@ TargetSchedModel::computeInstrLatency(const MachineInstr *MI, return Latency; } } - return TII->defaultDefLatency(&SchedModel, MI); + return TII->defaultDefLatency(SchedModel, MI); } unsigned TargetSchedModel:: @@ -268,7 +289,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, // for predicated defs. unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); const MachineFunction &MF = *DefMI->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) return computeInstrLatency(DefMI); diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index f42d47b..e218a83 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -48,6 +48,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "twoaddrinstr" @@ -544,10 +545,21 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, if (ToRegA) { unsigned FromRegB = getMappedReg(regB, SrcRegMap); unsigned FromRegC = getMappedReg(regC, SrcRegMap); - bool BComp = !FromRegB || regsAreCompatible(FromRegB, ToRegA, TRI); - bool CComp = !FromRegC || regsAreCompatible(FromRegC, ToRegA, TRI); - if (BComp != CComp) - return !BComp && CComp; + bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI); + bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI); + + // Compute if any of the following are true: + // -RegB is not tied to a register and RegC is compatible with RegA. + // -RegB is tied to the wrong physical register, but RegC is. + // -RegB is tied to the wrong physical register, and RegC isn't tied. + if ((!FromRegB && CompC) || (FromRegB && !CompB && (!FromRegC || CompC))) + return true; + // Don't compute if any of the following are true: + // -RegC is not tied to a register and RegB is compatible with RegA. + // -RegC is tied to the wrong physical register, but RegB is. + // -RegC is tied to the wrong physical register, and RegB isn't tied. + if ((!FromRegC && CompB) || (FromRegC && !CompC && (!FromRegB || CompB))) + return false; } // If there is a use of regC between its last def (could be livein) and this @@ -666,7 +678,7 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) { unsigned Reg = DstReg; while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, NewReg, IsDstPhys)) { - if (IsCopy && !Processed.insert(UseMI)) + if (IsCopy && !Processed.insert(UseMI).second) break; DenseMap::iterator DI = DistanceMap.find(UseMI); @@ -1503,9 +1515,9 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const TargetMachine &TM = MF->getTarget(); MRI = &MF->getRegInfo(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); - InstrItins = TM.getInstrItineraryData(); + TII = TM.getSubtargetImpl()->getInstrInfo(); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); + InstrItins = TM.getSubtargetImpl()->getInstrItineraryData(); LV = getAnalysisIfAvailable(); LIS = getAnalysisIfAvailable(); AA = &getAnalysis(); diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 2e22082..7824f92 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -64,9 +64,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { SmallPtrSet Reachable; // Mark all reachable blocks. - for (df_ext_iterator > I = - df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I) - /* Mark all reachable blocks */; + for (BasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB/* Mark all reachable blocks */; // Loop over all dead blocks, remembering them and deleting all instructions // in them. @@ -125,10 +124,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { MachineLoopInfo *MLI = getAnalysisIfAvailable(); // Mark all reachable blocks. - for (df_ext_iterator > - I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); - I != E; ++I) - /* Mark all reachable blocks */; + for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB/* Mark all reachable blocks */; // Loop over all dead blocks, remembering them and deleting all instructions // in them. diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 704736f..0d17d43 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -54,8 +55,8 @@ INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false) bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { MRI = &mf.getRegInfo(); - TII = mf.getTarget().getInstrInfo(); - TRI = mf.getTarget().getRegisterInfo(); + TII = mf.getSubtarget().getInstrInfo(); + TRI = mf.getSubtarget().getRegisterInfo(); MF = &mf; Virt2PhysMap.clear(); @@ -123,7 +124,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { OS << '[' << PrintReg(Reg, TRI) << " -> " << PrintReg(Virt2PhysMap[Reg], TRI) << "] " - << MRI->getRegClass(Reg)->getName() << "\n"; + << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; } } @@ -131,7 +132,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] - << "] " << MRI->getRegClass(Reg)->getName() << "\n"; + << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; } } OS << '\n'; @@ -205,8 +206,8 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { MF = &fn; TM = &MF->getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); MRI = &MF->getRegInfo(); Indexes = &getAnalysis(); LIS = &getAnalysis(); diff --git a/lib/DebugInfo/Android.mk b/lib/DebugInfo/Android.mk index 12dfb3b..e777e9c 100644 --- a/lib/DebugInfo/Android.mk +++ b/lib/DebugInfo/Android.mk @@ -3,6 +3,7 @@ LOCAL_PATH:= $(call my-dir) debuginfo_SRC_FILES := \ DIContext.cpp \ DWARFAbbreviationDeclaration.cpp \ + DWARFAcceleratorTable.cpp \ DWARFCompileUnit.cpp \ DWARFContext.cpp \ DWARFDebugAbbrev.cpp \ diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index 61a3fb0..81fc84d 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMDebugInfo DIContext.cpp DWARFAbbreviationDeclaration.cpp + DWARFAcceleratorTable.cpp DWARFCompileUnit.cpp DWARFContext.cpp DWARFDebugAbbrev.cpp diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp index 49a4409..01aecf8 100644 --- a/lib/DebugInfo/DIContext.cpp +++ b/lib/DebugInfo/DIContext.cpp @@ -13,6 +13,6 @@ using namespace llvm; DIContext::~DIContext() {} -DIContext *DIContext::getDWARFContext(object::ObjectFile *Obj) { +DIContext *DIContext::getDWARFContext(const object::ObjectFile &Obj) { return new DWARFContextInMemory(Obj); } diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/lib/DebugInfo/DWARFAbbreviationDeclaration.h index b86b9ec..bb05c30 100644 --- a/lib/DebugInfo/DWARFAbbreviationDeclaration.h +++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H -#define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H +#define LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataExtractor.h" diff --git a/lib/DebugInfo/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARFAcceleratorTable.cpp new file mode 100644 index 0000000..703274d --- /dev/null +++ b/lib/DebugInfo/DWARFAcceleratorTable.cpp @@ -0,0 +1,133 @@ +//===--- DWARFAcceleratorTable.cpp ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFAcceleratorTable.h" + +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +bool DWARFAcceleratorTable::extract() { + uint32_t Offset = 0; + + // Check that we can at least read the header. + if (!AccelSection.isValidOffset(offsetof(Header, HeaderDataLength)+4)) + return false; + + Hdr.Magic = AccelSection.getU32(&Offset); + Hdr.Version = AccelSection.getU16(&Offset); + Hdr.HashFunction = AccelSection.getU16(&Offset); + Hdr.NumBuckets = AccelSection.getU32(&Offset); + Hdr.NumHashes = AccelSection.getU32(&Offset); + Hdr.HeaderDataLength = AccelSection.getU32(&Offset); + + // Check that we can read all the hashes and offsets from the + // section (see SourceLevelDebugging.rst for the structure of the index). + if (!AccelSection.isValidOffset(sizeof(Hdr) + Hdr.HeaderDataLength + + Hdr.NumBuckets*4 + Hdr.NumHashes*8)) + return false; + + HdrData.DIEOffsetBase = AccelSection.getU32(&Offset); + uint32_t NumAtoms = AccelSection.getU32(&Offset); + + for (unsigned i = 0; i < NumAtoms; ++i) { + uint16_t AtomType = AccelSection.getU16(&Offset); + uint16_t AtomForm = AccelSection.getU16(&Offset); + HdrData.Atoms.push_back(std::make_pair(AtomType, AtomForm)); + } + + return true; +} + +void DWARFAcceleratorTable::dump(raw_ostream &OS) const { + // Dump the header. + OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n' + << "Version = " << format("0x%04x", Hdr.Version) << '\n' + << "Hash function = " << format("0x%08x", Hdr.HashFunction) << '\n' + << "Bucket count = " << Hdr.NumBuckets << '\n' + << "Hashes count = " << Hdr.NumHashes << '\n' + << "HeaderData length = " << Hdr.HeaderDataLength << '\n' + << "DIE offset base = " << HdrData.DIEOffsetBase << '\n' + << "Number of atoms = " << HdrData.Atoms.size() << '\n'; + + unsigned i = 0; + SmallVector AtomForms; + for (const auto &Atom: HdrData.Atoms) { + OS << format("Atom[%d] Type: ", i++); + if (const char *TypeString = dwarf::AtomTypeString(Atom.first)) + OS << TypeString; + else + OS << format("DW_ATOM_Unknown_0x%x", Atom.first); + OS << " Form: "; + if (const char *FormString = dwarf::FormEncodingString(Atom.second)) + OS << FormString; + else + OS << format("DW_FORM_Unknown_0x%x", Atom.second); + OS << '\n'; + AtomForms.push_back(DWARFFormValue(Atom.second)); + } + + // Now go through the actual tables and dump them. + uint32_t Offset = sizeof(Hdr) + Hdr.HeaderDataLength; + unsigned HashesBase = Offset + Hdr.NumBuckets * 4; + unsigned OffsetsBase = HashesBase + Hdr.NumHashes * 4; + + for (unsigned Bucket = 0; Bucket < Hdr.NumBuckets; ++Bucket) { + unsigned Index = AccelSection.getU32(&Offset); + + OS << format("Bucket[%d]\n", Bucket); + if (Index == UINT32_MAX) { + OS << " EMPTY\n"; + continue; + } + + for (unsigned HashIdx = Index; HashIdx < Hdr.NumHashes; ++HashIdx) { + unsigned HashOffset = HashesBase + HashIdx*4; + unsigned OffsetsOffset = OffsetsBase + HashIdx*4; + uint32_t Hash = AccelSection.getU32(&HashOffset); + + if (Hash % Hdr.NumBuckets != Bucket) + break; + + unsigned DataOffset = AccelSection.getU32(&OffsetsOffset); + OS << format(" Hash = 0x%08x Offset = 0x%08x\n", Hash, DataOffset); + if (!AccelSection.isValidOffset(DataOffset)) { + OS << " Invalid section offset\n"; + continue; + } + while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) { + unsigned StringOffset = AccelSection.getU32(&DataOffset); + RelocAddrMap::const_iterator Reloc = Relocs.find(DataOffset-4); + if (Reloc != Relocs.end()) + StringOffset += Reloc->second.second; + if (!StringOffset) + break; + OS << format(" Name: %08x \"%s\"\n", StringOffset, + StringSection.getCStr(&StringOffset)); + unsigned NumData = AccelSection.getU32(&DataOffset); + for (unsigned Data = 0; Data < NumData; ++Data) { + OS << format(" Data[%d] => ", Data); + unsigned i = 0; + for (auto &Atom : AtomForms) { + OS << format("{Atom[%d]: ", i++); + if (Atom.extractValue(AccelSection, &DataOffset, nullptr)) + Atom.dump(OS, nullptr); + else + OS << "Error extracting the value"; + OS << "} "; + } + OS << '\n'; + } + } + } + } +} +} diff --git a/lib/DebugInfo/DWARFAcceleratorTable.h b/lib/DebugInfo/DWARFAcceleratorTable.h new file mode 100644 index 0000000..7dc9591 --- /dev/null +++ b/lib/DebugInfo/DWARFAcceleratorTable.h @@ -0,0 +1,51 @@ +//===--- DWARFAcceleratorTable.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFRelocMap.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/DWARFFormValue.h" + +#include + +namespace llvm { + +class DWARFAcceleratorTable { + + struct Header { + uint32_t Magic; + uint16_t Version; + uint16_t HashFunction; + uint32_t NumBuckets; + uint32_t NumHashes; + uint32_t HeaderDataLength; + }; + + struct HeaderData { + typedef uint16_t AtomType; + typedef uint16_t Form; + uint32_t DIEOffsetBase; + SmallVector, 3> Atoms; + }; + + struct Header Hdr; + struct HeaderData HdrData; + DataExtractor AccelSection; + DataExtractor StringSection; + const RelocAddrMap& Relocs; +public: + DWARFAcceleratorTable(DataExtractor AccelSection, DataExtractor StringSection, + const RelocAddrMap &Relocs) + : AccelSection(AccelSection), StringSection(StringSection), Relocs(Relocs) {} + + bool extract(); + void dump(raw_ostream &OS) const; +}; + +} diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index 2ed188e..b3190b18 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H -#define LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFCOMPILEUNIT_H +#define LLVM_LIB_DEBUGINFO_DWARFCOMPILEUNIT_H #include "DWARFUnit.h" @@ -16,10 +16,11 @@ namespace llvm { class DWARFCompileUnit : public DWARFUnit { public: - DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS, - StringRef SS, StringRef SOS, StringRef AOS, - const RelocAddrMap *M, bool LE) - : DWARFUnit(DA, IS, RS, SS, SOS, AOS, M, LE) {} + DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section, + const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, + StringRef SOS, StringRef AOS, bool LE, + const DWARFUnitSectionBase &UnitSection) + : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {} void dump(raw_ostream &OS); // VTable anchor. ~DWARFCompileUnit() override; diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 3961905..9a2c7cc 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -9,6 +9,7 @@ #include "DWARFContext.h" #include "DWARFDebugArangeSet.h" +#include "DWARFAcceleratorTable.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" @@ -59,6 +60,18 @@ static void dumpPubSection(raw_ostream &OS, StringRef Name, StringRef Data, } } +static void dumpAccelSection(raw_ostream &OS, StringRef Name, + const DWARFSection& Section, StringRef StringSection, + bool LittleEndian) { + DataExtractor AccelSection(Section.Data, LittleEndian, 0); + DataExtractor StrData(StringSection, LittleEndian, 0); + OS << "\n." << Name << " contents:\n"; + DWARFAcceleratorTable Accel(AccelSection, StrData, Section.Relocs); + if (!Accel.extract()) + return; + Accel.dump(OS); +} + void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) { OS << ".debug_abbrev contents:\n"; @@ -86,15 +99,17 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { if ((DumpType == DIDT_All || DumpType == DIDT_Types) && getNumTypeUnits()) { OS << "\n.debug_types contents:\n"; - for (const auto &TU : type_units()) - TU->dump(OS); + for (const auto &TUS : type_unit_sections()) + for (const auto &TU : TUS) + TU->dump(OS); } if ((DumpType == DIDT_All || DumpType == DIDT_TypesDwo) && getNumDWOTypeUnits()) { OS << "\n.debug_types.dwo contents:\n"; - for (const auto &DWOTU : dwo_type_units()) - DWOTU->dump(OS); + for (const auto &DWOTUS : dwo_type_unit_sections()) + for (const auto &DWOTU : DWOTUS) + DWOTU->dump(OS); } if (DumpType == DIDT_All || DumpType == DIDT_Loc) { @@ -216,6 +231,22 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); } } + + if (DumpType == DIDT_All || DumpType == DIDT_AppleNames) + dumpAccelSection(OS, "apple_names", getAppleNamesSection(), + getStringSection(), isLittleEndian()); + + if (DumpType == DIDT_All || DumpType == DIDT_AppleTypes) + dumpAccelSection(OS, "apple_types", getAppleTypesSection(), + getStringSection(), isLittleEndian()); + + if (DumpType == DIDT_All || DumpType == DIDT_AppleNamespaces) + dumpAccelSection(OS, "apple_namespaces", getAppleNamespacesSection(), + getStringSection(), isLittleEndian()); + + if (DumpType == DIDT_All || DumpType == DIDT_AppleObjC) + dumpAccelSection(OS, "apple_objc", getAppleObjCSection(), + getStringSection(), isLittleEndian()); } const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { @@ -291,7 +322,7 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() { } const DWARFLineTable * -DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { +DWARFContext::getLineTableForUnit(DWARFUnit *cu) { if (!Line) Line.reset(new DWARFDebugLine(&getLineSection().Relocs)); @@ -312,110 +343,34 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { } void DWARFContext::parseCompileUnits() { - if (!CUs.empty()) - return; - uint32_t offset = 0; - const DataExtractor &DIData = DataExtractor(getInfoSection().Data, - isLittleEndian(), 0); - while (DIData.isValidOffset(offset)) { - std::unique_ptr CU(new DWARFCompileUnit( - getDebugAbbrev(), getInfoSection().Data, getRangeSection(), - getStringSection(), StringRef(), getAddrSection(), - &getInfoSection().Relocs, isLittleEndian())); - if (!CU->extract(DIData, &offset)) { - break; - } - CUs.push_back(std::move(CU)); - offset = CUs.back()->getNextUnitOffset(); - } + CUs.parse(*this, getInfoSection()); } void DWARFContext::parseTypeUnits() { if (!TUs.empty()) return; for (const auto &I : getTypesSections()) { - uint32_t offset = 0; - const DataExtractor &DIData = - DataExtractor(I.second.Data, isLittleEndian(), 0); - while (DIData.isValidOffset(offset)) { - std::unique_ptr TU( - new DWARFTypeUnit(getDebugAbbrev(), I.second.Data, getRangeSection(), - getStringSection(), StringRef(), getAddrSection(), - &I.second.Relocs, isLittleEndian())); - if (!TU->extract(DIData, &offset)) - break; - TUs.push_back(std::move(TU)); - offset = TUs.back()->getNextUnitOffset(); - } + TUs.push_back(DWARFUnitSection()); + TUs.back().parse(*this, I.second); } } void DWARFContext::parseDWOCompileUnits() { - if (!DWOCUs.empty()) - return; - uint32_t offset = 0; - const DataExtractor &DIData = - DataExtractor(getInfoDWOSection().Data, isLittleEndian(), 0); - while (DIData.isValidOffset(offset)) { - std::unique_ptr DWOCU(new DWARFCompileUnit( - getDebugAbbrevDWO(), getInfoDWOSection().Data, getRangeDWOSection(), - getStringDWOSection(), getStringOffsetDWOSection(), getAddrSection(), - &getInfoDWOSection().Relocs, isLittleEndian())); - if (!DWOCU->extract(DIData, &offset)) { - break; - } - DWOCUs.push_back(std::move(DWOCU)); - offset = DWOCUs.back()->getNextUnitOffset(); - } + DWOCUs.parseDWO(*this, getInfoDWOSection()); } void DWARFContext::parseDWOTypeUnits() { if (!DWOTUs.empty()) return; for (const auto &I : getTypesDWOSections()) { - uint32_t offset = 0; - const DataExtractor &DIData = - DataExtractor(I.second.Data, isLittleEndian(), 0); - while (DIData.isValidOffset(offset)) { - std::unique_ptr TU(new DWARFTypeUnit( - getDebugAbbrevDWO(), I.second.Data, getRangeDWOSection(), - getStringDWOSection(), getStringOffsetDWOSection(), getAddrSection(), - &I.second.Relocs, isLittleEndian())); - if (!TU->extract(DIData, &offset)) - break; - DWOTUs.push_back(std::move(TU)); - offset = DWOTUs.back()->getNextUnitOffset(); - } + DWOTUs.push_back(DWARFUnitSection()); + DWOTUs.back().parseDWO(*this, I.second); } } -namespace { - struct OffsetComparator { - - bool operator()(const std::unique_ptr &LHS, - const std::unique_ptr &RHS) const { - return LHS->getOffset() < RHS->getOffset(); - } - bool operator()(const std::unique_ptr &LHS, - uint32_t RHS) const { - return LHS->getOffset() < RHS; - } - bool operator()(uint32_t LHS, - const std::unique_ptr &RHS) const { - return LHS < RHS->getOffset(); - } - }; -} - DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { parseCompileUnits(); - - std::unique_ptr *CU = - std::lower_bound(CUs.begin(), CUs.end(), Offset, OffsetComparator()); - if (CU != CUs.end()) { - return CU->get(); - } - return nullptr; + return CUs.getUnitForOffset(Offset); } DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { @@ -425,47 +380,6 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { return getCompileUnitForOffset(CUOffset); } -static bool getFileNameForCompileUnit(DWARFCompileUnit *CU, - const DWARFLineTable *LineTable, - uint64_t FileIndex, FileLineInfoKind Kind, - std::string &FileName) { - if (!CU || !LineTable || Kind == FileLineInfoKind::None || - !LineTable->getFileNameByIndex(FileIndex, Kind, FileName)) - return false; - if (Kind == FileLineInfoKind::AbsoluteFilePath && - sys::path::is_relative(FileName)) { - // We may still need to append compilation directory of compile unit. - SmallString<16> AbsolutePath; - if (const char *CompilationDir = CU->getCompilationDir()) { - sys::path::append(AbsolutePath, CompilationDir); - } - sys::path::append(AbsolutePath, FileName); - FileName = AbsolutePath.str(); - } - return true; -} - -static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU, - const DWARFLineTable *LineTable, - uint64_t Address, - FileLineInfoKind Kind, - DILineInfo &Result) { - if (!CU || !LineTable) - return false; - // Get the index of row we're looking for in the line table. - uint32_t RowIndex = LineTable->lookupAddress(Address); - if (RowIndex == -1U) - return false; - // Take file number and line/column from the row. - const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; - if (!getFileNameForCompileUnit(CU, LineTable, Row.File, Kind, - Result.FileName)) - return false; - Result.Line = Row.Line; - Result.Column = Row.Column; - return true; -} - static bool getFunctionNameForAddress(DWARFCompileUnit *CU, uint64_t Address, FunctionNameKind Kind, std::string &FunctionName) { @@ -496,8 +410,9 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, return Result; getFunctionNameForAddress(CU, Address, Spec.FNKind, Result.FunctionName); if (Spec.FLIKind != FileLineInfoKind::None) { - const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU); - getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind, Result); + if (const DWARFLineTable *LineTable = getLineTableForUnit(CU)) + LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(), + Spec.FLIKind, Result); } return Result; } @@ -522,7 +437,7 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size, return Lines; } - const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU); + const DWARFLineTable *LineTable = getLineTableForUnit(CU); // Get the index of row we're looking for in the line table. std::vector RowVector; @@ -533,8 +448,8 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size, // Take file number and line/column from the row. const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; DILineInfo Result; - getFileNameForCompileUnit(CU, LineTable, Row.File, Spec.FLIKind, - Result.FileName); + LineTable->getFileNameByIndex(Row.File, CU->getCompilationDir(), + Spec.FLIKind, Result.FileName); Result.FunctionName = FunctionName; Result.Line = Row.Line; Result.Column = Row.Column; @@ -561,11 +476,11 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, // try to at least get file/line info from symbol table. if (Spec.FLIKind != FileLineInfoKind::None) { DILineInfo Frame; - LineTable = getLineTableForCompileUnit(CU); - if (getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind, - Frame)) { + LineTable = getLineTableForUnit(CU); + if (LineTable && + LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(), + Spec.FLIKind, Frame)) InliningInfo.addFrame(Frame); - } } return InliningInfo; } @@ -582,15 +497,17 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, if (i == 0) { // For the topmost frame, initialize the line table of this // compile unit and fetch file/line info from it. - LineTable = getLineTableForCompileUnit(CU); + LineTable = getLineTableForUnit(CU); // For the topmost routine, get file/line info from line table. - getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind, - Frame); + if (LineTable) + LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(), + Spec.FLIKind, Frame); } else { // Otherwise, use call file, call line and call column from // previous DIE in inlined chain. - getFileNameForCompileUnit(CU, LineTable, CallFile, Spec.FLIKind, - Frame.FileName); + if (LineTable) + LineTable->getFileNameByIndex(CallFile, CU->getCompilationDir(), + Spec.FLIKind, Frame.FileName); Frame.Line = CallLine; Frame.Column = CallColumn; } @@ -621,12 +538,19 @@ static bool consumeCompressedDebugSectionHeader(StringRef &data, return true; } -DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) - : IsLittleEndian(Obj->isLittleEndian()), - AddressSize(Obj->getBytesInAddress()) { - for (const SectionRef &Section : Obj->sections()) { +DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj) + : IsLittleEndian(Obj.isLittleEndian()), + AddressSize(Obj.getBytesInAddress()) { + for (const SectionRef &Section : Obj.sections()) { StringRef name; Section.getName(name); + // Skip BSS and Virtual sections, they aren't interesting. + bool IsBSS = Section.isBSS(); + if (IsBSS) + continue; + bool IsVirtual = Section.isVirtual(); + if (IsVirtual) + continue; StringRef data; Section.getContents(data); @@ -670,6 +594,11 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) .Case("debug_str.dwo", &StringDWOSection) .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) .Case("debug_addr", &AddrSection) + .Case("apple_names", &AppleNamesSection.Data) + .Case("apple_types", &AppleTypesSection.Data) + .Case("apple_namespaces", &AppleNamespacesSection.Data) + .Case("apple_namespac", &AppleNamespacesSection.Data) + .Case("apple_objc", &AppleObjCSection.Data) // Any more debug info sections go here. .Default(nullptr); if (SectionData) { @@ -687,7 +616,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) } section_iterator RelocatedSection = Section.getRelocatedSection(); - if (RelocatedSection == Obj->section_end()) + if (RelocatedSection == Obj.section_end()) continue; StringRef RelSecName; @@ -702,6 +631,11 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) .Case("debug_loc", &LocSection.Relocs) .Case("debug_info.dwo", &InfoDWOSection.Relocs) .Case("debug_line", &LineSection.Relocs) + .Case("apple_names", &AppleNamesSection.Relocs) + .Case("apple_types", &AppleTypesSection.Relocs) + .Case("apple_namespaces", &AppleNamespacesSection.Relocs) + .Case("apple_namespac", &AppleNamespacesSection.Relocs) + .Case("apple_objc", &AppleObjCSection.Relocs) .Default(nullptr); if (!Map) { // Find debug_types relocs by section rather than name as there are @@ -715,23 +649,19 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) } if (Section.relocation_begin() != Section.relocation_end()) { - uint64_t SectionSize; - RelocatedSection->getSize(SectionSize); + uint64_t SectionSize = RelocatedSection->getSize(); for (const RelocationRef &Reloc : Section.relocations()) { uint64_t Address; Reloc.getOffset(Address); uint64_t Type; Reloc.getType(Type); uint64_t SymAddr = 0; - // ELF relocations may need the symbol address - if (Obj->isELF()) { - object::symbol_iterator Sym = Reloc.getSymbol(); + object::symbol_iterator Sym = Reloc.getSymbol(); + if (Sym != Obj.symbol_end()) Sym->getAddress(SymAddr); - } - object::RelocVisitor V(Obj->getFileFormatName()); - // The section address is always 0 for debug sections. - object::RelocToApply R(V.visit(Type, Reloc, 0, SymAddr)); + object::RelocVisitor V(Obj); + object::RelocToApply R(V.visit(Type, Reloc, SymAddr)); if (V.error()) { SmallString<32> Name; std::error_code ec(Reloc.getTypeName(Name)); diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index 6d1ae92..dd3fcc7 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===/ -#ifndef LLVM_DEBUGINFO_DWARFCONTEXT_H -#define LLVM_DEBUGINFO_DWARFCONTEXT_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFCONTEXT_H +#define LLVM_LIB_DEBUGINFO_DWARFCONTEXT_H #include "DWARFCompileUnit.h" #include "DWARFDebugAranges.h" @@ -16,10 +16,12 @@ #include "DWARFDebugLine.h" #include "DWARFDebugLoc.h" #include "DWARFDebugRangeList.h" +#include "DWARFSection.h" #include "DWARFTypeUnit.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DIContext.h" +#include namespace llvm { @@ -28,19 +30,17 @@ namespace llvm { /// information parsing. The actual data is supplied through pure virtual /// methods that a concrete implementation provides. class DWARFContext : public DIContext { - typedef SmallVector, 1> CUVector; - typedef SmallVector, 1> TUVector; - CUVector CUs; - TUVector TUs; + DWARFUnitSection CUs; + std::vector> TUs; std::unique_ptr Abbrev; std::unique_ptr Loc; std::unique_ptr Aranges; std::unique_ptr Line; std::unique_ptr DebugFrame; - CUVector DWOCUs; - TUVector DWOTUs; + DWARFUnitSection DWOCUs; + std::vector> DWOTUs; std::unique_ptr AbbrevDWO; std::unique_ptr LocDWO; @@ -64,11 +64,6 @@ class DWARFContext : public DIContext { void parseDWOTypeUnits(); public: - struct Section { - StringRef Data; - RelocAddrMap Relocs; - }; - DWARFContext() : DIContext(CK_DWARF) {} static bool classof(const DIContext *DICtx) { @@ -77,8 +72,9 @@ public: void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All) override; - typedef iterator_range cu_iterator_range; - typedef iterator_range tu_iterator_range; + typedef DWARFUnitSection::iterator_range cu_iterator_range; + typedef DWARFUnitSection::iterator_range tu_iterator_range; + typedef iterator_range>::iterator> tu_section_iterator_range; /// Get compile units in this context. cu_iterator_range compile_units() { @@ -87,9 +83,9 @@ public: } /// Get type units in this context. - tu_iterator_range type_units() { + tu_section_iterator_range type_unit_sections() { parseTypeUnits(); - return tu_iterator_range(TUs.begin(), TUs.end()); + return tu_section_iterator_range(TUs.begin(), TUs.end()); } /// Get compile units in the DWO context. @@ -99,9 +95,9 @@ public: } /// Get type units in the DWO context. - tu_iterator_range dwo_type_units() { + tu_section_iterator_range dwo_type_unit_sections() { parseDWOTypeUnits(); - return tu_iterator_range(DWOTUs.begin(), DWOTUs.end()); + return tu_section_iterator_range(DWOTUs.begin(), DWOTUs.end()); } /// Get the number of compile units in this context. @@ -159,8 +155,7 @@ public: const DWARFDebugFrame *getDebugFrame(); /// Get a pointer to a parsed line table corresponding to a compile unit. - const DWARFDebugLine::LineTable * - getLineTableForCompileUnit(DWARFCompileUnit *cu); + const DWARFDebugLine::LineTable *getLineTableForUnit(DWARFUnit *cu); DILineInfo getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; @@ -171,17 +166,15 @@ public: virtual bool isLittleEndian() const = 0; virtual uint8_t getAddressSize() const = 0; - virtual const Section &getInfoSection() = 0; - typedef MapVector > TypeSectionMap; + virtual const DWARFSection &getInfoSection() = 0; + typedef MapVector> TypeSectionMap; virtual const TypeSectionMap &getTypesSections() = 0; virtual StringRef getAbbrevSection() = 0; - virtual const Section &getLocSection() = 0; - virtual const Section &getLocDWOSection() = 0; + virtual const DWARFSection &getLocSection() = 0; virtual StringRef getARangeSection() = 0; virtual StringRef getDebugFrameSection() = 0; - virtual const Section &getLineSection() = 0; - virtual const Section &getLineDWOSection() = 0; + virtual const DWARFSection &getLineSection() = 0; virtual StringRef getStringSection() = 0; virtual StringRef getRangeSection() = 0; virtual StringRef getPubNamesSection() = 0; @@ -190,13 +183,19 @@ public: virtual StringRef getGnuPubTypesSection() = 0; // Sections for DWARF5 split dwarf proposal. - virtual const Section &getInfoDWOSection() = 0; + virtual const DWARFSection &getInfoDWOSection() = 0; virtual const TypeSectionMap &getTypesDWOSections() = 0; virtual StringRef getAbbrevDWOSection() = 0; + virtual const DWARFSection &getLineDWOSection() = 0; + virtual const DWARFSection &getLocDWOSection() = 0; virtual StringRef getStringDWOSection() = 0; virtual StringRef getStringOffsetDWOSection() = 0; virtual StringRef getRangeDWOSection() = 0; virtual StringRef getAddrSection() = 0; + virtual const DWARFSection& getAppleNamesSection() = 0; + virtual const DWARFSection& getAppleTypesSection() = 0; + virtual const DWARFSection& getAppleNamespacesSection() = 0; + virtual const DWARFSection& getAppleObjCSection() = 0; static bool isSupportedVersion(unsigned version) { return version == 2 || version == 3 || version == 4; @@ -217,15 +216,13 @@ class DWARFContextInMemory : public DWARFContext { virtual void anchor(); bool IsLittleEndian; uint8_t AddressSize; - Section InfoSection; + DWARFSection InfoSection; TypeSectionMap TypesSections; StringRef AbbrevSection; - Section LocSection; - Section LocDWOSection; + DWARFSection LocSection; StringRef ARangeSection; StringRef DebugFrameSection; - Section LineSection; - Section LineDWOSection; + DWARFSection LineSection; StringRef StringSection; StringRef RangeSection; StringRef PubNamesSection; @@ -234,42 +231,52 @@ class DWARFContextInMemory : public DWARFContext { StringRef GnuPubTypesSection; // Sections for DWARF5 split dwarf proposal. - Section InfoDWOSection; + DWARFSection InfoDWOSection; TypeSectionMap TypesDWOSections; StringRef AbbrevDWOSection; + DWARFSection LineDWOSection; + DWARFSection LocDWOSection; StringRef StringDWOSection; StringRef StringOffsetDWOSection; StringRef RangeDWOSection; StringRef AddrSection; + DWARFSection AppleNamesSection; + DWARFSection AppleTypesSection; + DWARFSection AppleNamespacesSection; + DWARFSection AppleObjCSection; SmallVector, 4> UncompressedSections; public: - DWARFContextInMemory(object::ObjectFile *); + DWARFContextInMemory(const object::ObjectFile &Obj); bool isLittleEndian() const override { return IsLittleEndian; } uint8_t getAddressSize() const override { return AddressSize; } - const Section &getInfoSection() override { return InfoSection; } + const DWARFSection &getInfoSection() override { return InfoSection; } const TypeSectionMap &getTypesSections() override { return TypesSections; } StringRef getAbbrevSection() override { return AbbrevSection; } - const Section &getLocSection() override { return LocSection; } - const Section &getLocDWOSection() override { return LocDWOSection; } + const DWARFSection &getLocSection() override { return LocSection; } StringRef getARangeSection() override { return ARangeSection; } StringRef getDebugFrameSection() override { return DebugFrameSection; } - const Section &getLineSection() override { return LineSection; } - const Section &getLineDWOSection() override { return LineDWOSection; } + const DWARFSection &getLineSection() override { return LineSection; } StringRef getStringSection() override { return StringSection; } StringRef getRangeSection() override { return RangeSection; } StringRef getPubNamesSection() override { return PubNamesSection; } StringRef getPubTypesSection() override { return PubTypesSection; } StringRef getGnuPubNamesSection() override { return GnuPubNamesSection; } StringRef getGnuPubTypesSection() override { return GnuPubTypesSection; } + const DWARFSection& getAppleNamesSection() override { return AppleNamesSection; } + const DWARFSection& getAppleTypesSection() override { return AppleTypesSection; } + const DWARFSection& getAppleNamespacesSection() override { return AppleNamespacesSection; } + const DWARFSection& getAppleObjCSection() override { return AppleObjCSection; } // Sections for DWARF5 split dwarf proposal. - const Section &getInfoDWOSection() override { return InfoDWOSection; } + const DWARFSection &getInfoDWOSection() override { return InfoDWOSection; } const TypeSectionMap &getTypesDWOSections() override { return TypesDWOSections; } StringRef getAbbrevDWOSection() override { return AbbrevDWOSection; } + const DWARFSection &getLineDWOSection() override { return LineDWOSection; } + const DWARFSection &getLocDWOSection() override { return LocDWOSection; } StringRef getStringDWOSection() override { return StringDWOSection; } StringRef getStringOffsetDWOSection() override { return StringOffsetDWOSection; diff --git a/lib/DebugInfo/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARFDebugAbbrev.cpp index 8426bf9..c1a088e 100644 --- a/lib/DebugInfo/DWARFDebugAbbrev.cpp +++ b/lib/DebugInfo/DWARFDebugAbbrev.cpp @@ -30,7 +30,6 @@ bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data, DWARFAbbreviationDeclaration AbbrDecl; uint32_t PrevAbbrCode = 0; while (AbbrDecl.extract(Data, OffsetPtr)) { - Decls.push_back(AbbrDecl); if (FirstAbbrCode == 0) { FirstAbbrCode = AbbrDecl.getCode(); } else { @@ -40,6 +39,7 @@ bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data, } } PrevAbbrCode = AbbrDecl.getCode(); + Decls.push_back(std::move(AbbrDecl)); } return BeginOffset != *OffsetPtr; } @@ -82,7 +82,7 @@ void DWARFDebugAbbrev::extract(DataExtractor Data) { uint32_t CUAbbrOffset = Offset; if (!AbbrDecls.extract(Data, &Offset)) break; - AbbrDeclSets[CUAbbrOffset] = AbbrDecls; + AbbrDeclSets[CUAbbrOffset] = std::move(AbbrDecls); } } diff --git a/lib/DebugInfo/DWARFDebugAbbrev.h b/lib/DebugInfo/DWARFDebugAbbrev.h index 3a9adba..4b3b814 100644 --- a/lib/DebugInfo/DWARFDebugAbbrev.h +++ b/lib/DebugInfo/DWARFDebugAbbrev.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFDEBUGABBREV_H -#define LLVM_DEBUGINFO_DWARFDEBUGABBREV_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGABBREV_H +#define LLVM_LIB_DEBUGINFO_DWARFDEBUGABBREV_H #include "DWARFAbbreviationDeclaration.h" #include diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h index d6c2d8b..837a8e6 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.h +++ b/lib/DebugInfo/DWARFDebugArangeSet.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H -#define LLVM_DEBUGINFO_DWARFDEBUGARANGESET_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGARANGESET_H +#define LLVM_LIB_DEBUGINFO_DWARFDEBUGARANGESET_H #include "llvm/ADT/iterator_range.h" #include "llvm/Support/DataExtractor.h" diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h index a9f37fe..791f010 100644 --- a/lib/DebugInfo/DWARFDebugAranges.h +++ b/lib/DebugInfo/DWARFDebugAranges.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGES_H -#define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGARANGES_H +#define LLVM_LIB_DEBUGINFO_DWARFDEBUGARANGES_H #include "llvm/ADT/DenseSet.h" #include "llvm/Support/DataExtractor.h" diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index a33548e..dfa7e82 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -202,7 +202,8 @@ public: SmallString<8> Augmentation, uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister) : FrameEntry(FK_CIE, Offset, Length), Version(Version), - Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor), + Augmentation(std::move(Augmentation)), + CodeAlignmentFactor(CodeAlignmentFactor), DataAlignmentFactor(DataAlignmentFactor), ReturnAddressRegister(ReturnAddressRegister) {} diff --git a/lib/DebugInfo/DWARFDebugFrame.h b/lib/DebugInfo/DWARFDebugFrame.h index bd4ef45..be925cb 100644 --- a/lib/DebugInfo/DWARFDebugFrame.h +++ b/lib/DebugInfo/DWARFDebugFrame.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFDEBUGFRAME_H -#define LLVM_DEBUGINFO_DWARFDEBUGFRAME_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGFRAME_H +#define LLVM_LIB_DEBUGINFO_DWARFDEBUGFRAME_H #include "llvm/Support/DataExtractor.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 2e7a54a..583e700 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -12,15 +12,26 @@ #include "DWARFContext.h" #include "DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARFFormValue.h" +#include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace dwarf; -typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; -void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, const DWARFUnit *u, +// Small helper to extract a DIE pointed by a reference +// attribute. It looks up the Unit containing the DIE and calls +// DIE.extractFast with the right unit. Returns new unit on success, +// nullptr otherwise. +static const DWARFUnit *findUnitAndExtractFast(DWARFDebugInfoEntryMinimal &DIE, + const DWARFUnit *Unit, + uint32_t *Offset) { + Unit = Unit->getUnitSection().getUnitForOffset(*Offset); + return (Unit && DIE.extractFast(Unit, Offset)) ? Unit : nullptr; +} + +void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, DWARFUnit *u, unsigned recurseDepth, unsigned indent) const { DataExtractor debug_info_data = u->getDebugInfoExtractor(); @@ -62,12 +73,42 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, const DWARFUnit *u, } } +static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) { + OS << " ("; + do { + uint64_t Bit = 1ULL << countTrailingZeros(Val); + if (const char *PropName = ApplePropertyString(Bit)) + OS << PropName; + else + OS << format("DW_APPLE_PROPERTY_0x%" PRIx64, Bit); + if (!(Val ^= Bit)) + break; + OS << ", "; + } while (true); + OS << ")"; +} + +static void dumpRanges(raw_ostream &OS, const DWARFAddressRangesVector& Ranges, + unsigned AddressSize, unsigned Indent) { + if (Ranges.empty()) + return; + + for (const auto &Range: Ranges) { + OS << '\n'; + OS.indent(Indent); + OS << format("[0x%0*" PRIx64 " - 0x%0*" PRIx64 ")", + AddressSize*2, Range.first, + AddressSize*2, Range.second); + } +} + void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS, - const DWARFUnit *u, + DWARFUnit *u, uint32_t *offset_ptr, uint16_t attr, uint16_t form, unsigned indent) const { - OS << " "; + const char BaseIndent[] = " "; + OS << BaseIndent; OS.indent(indent+2); const char *attrString = AttributeString(attr); if (attrString) @@ -86,7 +127,48 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS, return; OS << "\t("; - formValue.dump(OS, u); + + const char *Name = nullptr; + std::string File; + if (attr == DW_AT_decl_file || attr == DW_AT_call_file) { + if (const auto *LT = u->getContext().getLineTableForUnit(u)) + if (LT->getFileNameByIndex( + formValue.getAsUnsignedConstant().getValue(), + u->getCompilationDir(), + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) { + File = '"' + File + '"'; + Name = File.c_str(); + } + } else if (Optional Val = formValue.getAsUnsignedConstant()) + Name = AttributeValueString(attr, *Val); + + if (Name) { + OS << Name; + } else if (attr == DW_AT_decl_line || attr == DW_AT_call_line) { + OS << *formValue.getAsUnsignedConstant(); + } else { + formValue.dump(OS, u); + } + + // We have dumped the attribute raw value. For some attributes + // having both the raw value and the pretty-printed value is + // interesting. These attributes are handled below. + if ((attr == DW_AT_specification || attr == DW_AT_abstract_origin) && + // The signature references aren't handled. + formValue.getForm() != DW_FORM_ref_sig8) { + uint32_t Ref = formValue.getAsReference(u).getValue(); + DWARFDebugInfoEntryMinimal DIE; + if (const DWARFUnit *RefU = findUnitAndExtractFast(DIE, u, &Ref)) + if (const char *Ref = DIE.getName(RefU, DINameKind::LinkageName)) + OS << " \"" << Ref << '\"'; + } else if (attr == DW_AT_APPLE_property_attribute) { + if (Optional OptVal = formValue.getAsUnsignedConstant()) + dumpApplePropertyAttribute(OS, *OptVal); + } else if (attr == DW_AT_ranges) { + dumpRanges(OS, getAddressRanges(u), u->getAddressByteSize(), + sizeof(BaseIndent)+indent+4); + } + OS << ")\n"; } @@ -284,11 +366,19 @@ bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( const char * DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U, - FunctionNameKind Kind) const { - if (!isSubroutineDIE() || Kind == FunctionNameKind::None) + DINameKind Kind) const { + if (!isSubroutineDIE()) + return nullptr; + return getName(U, Kind); +} + +const char * +DWARFDebugInfoEntryMinimal::getName(const DWARFUnit *U, + DINameKind Kind) const { + if (Kind == DINameKind::None) return nullptr; // Try to get mangled name only if it was asked for. - if (Kind == FunctionNameKind::LinkageName) { + if (Kind == DINameKind::LinkageName) { if (const char *name = getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, nullptr)) return name; @@ -303,8 +393,8 @@ DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U, getAttributeValueAsReference(U, DW_AT_specification, -1U); if (spec_ref != -1U) { DWARFDebugInfoEntryMinimal spec_die; - if (spec_die.extractFast(U, &spec_ref)) { - if (const char *name = spec_die.getSubroutineName(U, Kind)) + if (const DWARFUnit *RefU = findUnitAndExtractFast(spec_die, U, &spec_ref)) { + if (const char *name = spec_die.getName(RefU, Kind)) return name; } } @@ -313,8 +403,9 @@ DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U, getAttributeValueAsReference(U, DW_AT_abstract_origin, -1U); if (abs_origin_ref != -1U) { DWARFDebugInfoEntryMinimal abs_origin_die; - if (abs_origin_die.extractFast(U, &abs_origin_ref)) { - if (const char *name = abs_origin_die.getSubroutineName(U, Kind)) + if (const DWARFUnit *RefU = findUnitAndExtractFast(abs_origin_die, U, + &abs_origin_ref)) { + if (const char *name = abs_origin_die.getName(RefU, Kind)) return name; } } diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index cc58eb6..7e7efb9 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H -#define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGINFOENTRY_H +#define LLVM_LIB_DEBUGINFO_DWARFDEBUGINFOENTRY_H #include "DWARFAbbreviationDeclaration.h" #include "DWARFDebugRangeList.h" @@ -38,9 +38,9 @@ public: DWARFDebugInfoEntryMinimal() : Offset(0), SiblingIdx(0), AbbrevDecl(nullptr) {} - void dump(raw_ostream &OS, const DWARFUnit *u, unsigned recurseDepth, + void dump(raw_ostream &OS, DWARFUnit *u, unsigned recurseDepth, unsigned indent = 0) const; - void dumpAttribute(raw_ostream &OS, const DWARFUnit *u, uint32_t *offset_ptr, + void dumpAttribute(raw_ostream &OS, DWARFUnit *u, uint32_t *offset_ptr, uint16_t attr, uint16_t form, unsigned indent = 0) const; /// Extracts a debug info entry, which is a child of a given unit, @@ -125,9 +125,12 @@ public: /// returns its mangled name (or short name, if mangled is missing). /// This name may be fetched from specification or abstract origin /// for this subprogram. Returns null if no name is found. - const char * - getSubroutineName(const DWARFUnit *U, - DILineInfoSpecifier::FunctionNameKind Kind) const; + const char *getSubroutineName(const DWARFUnit *U, DINameKind Kind) const; + + /// Return the DIE name resolving DW_AT_sepcification or + /// DW_AT_abstract_origin references if necessary. + /// Returns null if no name is found. + const char *getName(const DWARFUnit *U, DINameKind Kind) const; /// Retrieves values of DW_AT_call_file, DW_AT_call_line and /// DW_AT_call_column from DIE (or zeroes if they are missing). diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index ce87635..a6ee461 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -644,6 +644,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRange( bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, + const char *CompDir, FileLineInfoKind Kind, std::string &Result) const { if (FileIndex == 0 || FileIndex > Prologue.FileNames.size() || @@ -656,15 +657,42 @@ DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, Result = FileName; return true; } + SmallString<16> FilePath; uint64_t IncludeDirIndex = Entry.DirIdx; + const char *IncludeDir = ""; // Be defensive about the contents of Entry. if (IncludeDirIndex > 0 && - IncludeDirIndex <= Prologue.IncludeDirectories.size()) { - const char *IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1]; - sys::path::append(FilePath, IncludeDir); - } - sys::path::append(FilePath, FileName); + IncludeDirIndex <= Prologue.IncludeDirectories.size()) + IncludeDir = Prologue.IncludeDirectories[IncludeDirIndex - 1]; + + // We may still need to append compilation directory of compile unit. + // We know that FileName is not absolute, the only way to have an + // absolute path at this point would be if IncludeDir is absolute. + if (CompDir && Kind == FileLineInfoKind::AbsoluteFilePath && + sys::path::is_relative(IncludeDir)) + sys::path::append(FilePath, CompDir); + + // sys::path::append skips empty strings. + sys::path::append(FilePath, IncludeDir, FileName); Result = FilePath.str(); return true; } + +bool +DWARFDebugLine::LineTable::getFileLineInfoForAddress(uint64_t Address, + const char *CompDir, + FileLineInfoKind Kind, + DILineInfo &Result) const { + // Get the index of row we're looking for in the line table. + uint32_t RowIndex = lookupAddress(Address); + if (RowIndex == -1U) + return false; + // Take file number and line/column from the row. + const auto &Row = Rows[RowIndex]; + if (!getFileNameByIndex(Row.File, CompDir, Kind, Result.FileName)) + return false; + Result.Line = Row.Line; + Result.Column = Row.Column; + return true; +} diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index c7b7ec2..7a6f1bd 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H -#define LLVM_DEBUGINFO_DWARFDEBUGLINE_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGLINE_H +#define LLVM_LIB_DEBUGINFO_DWARFDEBUGLINE_H #include "DWARFRelocMap.h" #include "llvm/DebugInfo/DIContext.h" @@ -179,10 +179,16 @@ public: // Extracts filename by its index in filename table in prologue. // Returns true on success. - bool getFileNameByIndex(uint64_t FileIndex, + bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir, DILineInfoSpecifier::FileLineInfoKind Kind, std::string &Result) const; + // Fills the Result argument with the file and line information + // corresponding to Address. Returns true on success. + bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir, + DILineInfoSpecifier::FileLineInfoKind Kind, + DILineInfo &Result) const; + void dump(raw_ostream &OS) const; void clear(); diff --git a/lib/DebugInfo/DWARFDebugLoc.h b/lib/DebugInfo/DWARFDebugLoc.h index 663acbb4..50110b3 100644 --- a/lib/DebugInfo/DWARFDebugLoc.h +++ b/lib/DebugInfo/DWARFDebugLoc.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFDEBUGLOC_H -#define LLVM_DEBUGINFO_DWARFDEBUGLOC_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGLOC_H +#define LLVM_LIB_DEBUGINFO_DWARFDEBUGLOC_H #include "DWARFRelocMap.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/DebugInfo/DWARFDebugRangeList.h b/lib/DebugInfo/DWARFDebugRangeList.h index 587b550..4ee3bda 100644 --- a/lib/DebugInfo/DWARFDebugRangeList.h +++ b/lib/DebugInfo/DWARFDebugRangeList.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H -#define LLVM_DEBUGINFO_DWARFDEBUGRANGELIST_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGRANGELIST_H +#define LLVM_LIB_DEBUGINFO_DWARFDEBUGRANGELIST_H #include "llvm/Support/DataExtractor.h" #include diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index 8d0f966..69b9771 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -139,6 +139,8 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, switch (Form) { case DW_FORM_addr: case DW_FORM_ref_addr: { + if (!cu) + return false; uint16_t AddrSize = (Form == DW_FORM_addr) ? cu->getAddressByteSize() @@ -179,8 +181,10 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, break; case DW_FORM_data4: case DW_FORM_ref4: { - RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr); Value.uval = data.getU32(offset_ptr); + if (!cu) + break; + RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4); if (AI != cu->getRelocMap()->end()) Value.uval += AI->second.second; break; @@ -193,13 +197,12 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, Value.sval = data.getSLEB128(offset_ptr); break; case DW_FORM_strp: { - RelocAddrMap::const_iterator AI - = cu->getRelocMap()->find(*offset_ptr); - if (AI != cu->getRelocMap()->end()) { - const std::pair &R = AI->second; - Value.uval = data.getU32(offset_ptr) + R.second; - } else - Value.uval = data.getU32(offset_ptr); + Value.uval = data.getU32(offset_ptr); + if (!cu) + break; + RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4); + if (AI != cu->getRelocMap()->end()) + Value.uval += AI->second.second; break; } case DW_FORM_udata: @@ -215,13 +218,12 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, break; case DW_FORM_sec_offset: { // FIXME: This is 64-bit for DWARF64. - RelocAddrMap::const_iterator AI - = cu->getRelocMap()->find(*offset_ptr); - if (AI != cu->getRelocMap()->end()) { - const std::pair &R = AI->second; - Value.uval = data.getU32(offset_ptr) + R.second; - } else - Value.uval = data.getU32(offset_ptr); + Value.uval = data.getU32(offset_ptr); + if (!cu) + break; + RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4); + if (AI != cu->getRelocMap()->end()) + Value.uval += AI->second.second; break; } case DW_FORM_flag_present: @@ -360,8 +362,6 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, void DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const { - DataExtractor debug_str_data(cu->getStringSection(), true, 0); - DataExtractor debug_str_offset_data(cu->getStringOffsetSection(), true, 0); uint64_t uvalue = Value.uval; bool cu_relative_offset = false; @@ -543,7 +543,15 @@ Optional DWARFFormValue::getAsSectionOffset() const { } Optional DWARFFormValue::getAsUnsignedConstant() const { - if (!isFormClass(FC_Constant) || Form == DW_FORM_sdata) + if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) + || Form == DW_FORM_sdata) return None; return Value.uval; } + +Optional> DWARFFormValue::getAsBlock() const { + if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc)) + return None; + return ArrayRef(Value.data, Value.uval); +} + diff --git a/lib/DebugInfo/DWARFRelocMap.h b/lib/DebugInfo/DWARFRelocMap.h index 6929e36..d7fe303 100644 --- a/lib/DebugInfo/DWARFRelocMap.h +++ b/lib/DebugInfo/DWARFRelocMap.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFRELOCMAP_H -#define LLVM_DEBUGINFO_DWARFRELOCMAP_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFRELOCMAP_H +#define LLVM_LIB_DEBUGINFO_DWARFRELOCMAP_H #include "llvm/ADT/DenseMap.h" @@ -18,5 +18,5 @@ typedef DenseMap > RelocAddrMap; } // namespace llvm -#endif // LLVM_DEBUGINFO_DWARFRELOCMAP_H +#endif diff --git a/lib/DebugInfo/DWARFSection.h b/lib/DebugInfo/DWARFSection.h new file mode 100644 index 0000000..3aaf0ff --- /dev/null +++ b/lib/DebugInfo/DWARFSection.h @@ -0,0 +1,24 @@ +//===-- DWARFSection.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_DEBUGINFO_DWARFSECTION_H +#define LLVM_LIB_DEBUGINFO_DWARFSECTION_H + +#include "DWARFRelocMap.h" + +namespace llvm { + +struct DWARFSection { + StringRef Data; + RelocAddrMap Relocs; +}; + +} + +#endif diff --git a/lib/DebugInfo/DWARFTypeUnit.h b/lib/DebugInfo/DWARFTypeUnit.h index cf773b8..7471b5a 100644 --- a/lib/DebugInfo/DWARFTypeUnit.h +++ b/lib/DebugInfo/DWARFTypeUnit.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFTYPEUNIT_H -#define LLVM_DEBUGINFO_DWARFTYPEUNIT_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFTYPEUNIT_H +#define LLVM_LIB_DEBUGINFO_DWARFTYPEUNIT_H #include "DWARFUnit.h" @@ -19,10 +19,11 @@ private: uint64_t TypeHash; uint32_t TypeOffset; public: - DWARFTypeUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS, - StringRef SS, StringRef SOS, StringRef AOS, - const RelocAddrMap *M, bool LE) - : DWARFUnit(DA, IS, RS, SS, SOS, AOS, M, LE) {} + DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section, + const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, + StringRef SOS, StringRef AOS, bool LE, + const DWARFUnitSectionBase &UnitSection) + : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {} uint32_t getHeaderSize() const override { return DWARFUnit::getHeaderSize() + 12; } diff --git a/lib/DebugInfo/DWARFUnit.cpp b/lib/DebugInfo/DWARFUnit.cpp index 39d0a0f..82c4529 100644 --- a/lib/DebugInfo/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARFUnit.cpp @@ -17,12 +17,26 @@ using namespace llvm; using namespace dwarf; -DWARFUnit::DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS, - StringRef SS, StringRef SOS, StringRef AOS, - const RelocAddrMap *M, bool LE) - : Abbrev(DA), InfoSection(IS), RangeSection(RS), StringSection(SS), - StringOffsetSection(SOS), AddrOffsetSection(AOS), RelocMap(M), - isLittleEndian(LE) { +void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { + parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(), + C.getStringSection(), StringRef(), C.getAddrSection(), + C.isLittleEndian()); +} + +void DWARFUnitSectionBase::parseDWO(DWARFContext &C, + const DWARFSection &DWOSection) { + parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(), + C.getStringDWOSection(), C.getStringOffsetDWOSection(), + C.getAddrSection(), C.isLittleEndian()); +} + +DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, + const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, + StringRef SOS, StringRef AOS, bool LE, + const DWARFUnitSectionBase &UnitSection) + : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), + StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS), + isLittleEndian(LE), UnitSection(UnitSection) { clear(); } @@ -235,10 +249,14 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { return DieArray.size(); } -DWARFUnit::DWOHolder::DWOHolder(object::ObjectFile *DWOFile) - : DWOFile(DWOFile), - DWOContext(cast(DIContext::getDWARFContext(DWOFile))), - DWOU(nullptr) { +DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath) + : DWOFile(), DWOContext(), DWOU(nullptr) { + auto Obj = object::ObjectFile::createObjectFile(DWOPath); + if (!Obj) + return; + DWOFile = std::move(Obj.get()); + DWOContext.reset( + cast(DIContext::getDWARFContext(*DWOFile.getBinary()))); if (DWOContext->getNumDWOCompileUnits() > 0) DWOU = DWOContext->getDWOCompileUnitAtIndex(0); } @@ -260,12 +278,7 @@ bool DWARFUnit::parseDWO() { sys::path::append(AbsolutePath, CompilationDir); } sys::path::append(AbsolutePath, DWOFileName); - ErrorOr DWOFile = - object::ObjectFile::createObjectFile(AbsolutePath); - if (!DWOFile) - return false; - // Reset DWOHolder. - DWO.reset(new DWOHolder(DWOFile.get())); + DWO = llvm::make_unique(AbsolutePath); DWARFUnit *DWOCU = DWO->getUnit(); // Verify that compile unit in .dwo file is valid. if (!DWOCU || DWOCU->getDWOId() != getDWOId()) { diff --git a/lib/DebugInfo/DWARFUnit.h b/lib/DebugInfo/DWARFUnit.h index 471da36..786f00f 100644 --- a/lib/DebugInfo/DWARFUnit.h +++ b/lib/DebugInfo/DWARFUnit.h @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_DWARFUNIT_H -#define LLVM_DEBUGINFO_DWARFUNIT_H +#ifndef LLVM_LIB_DEBUGINFO_DWARFUNIT_H +#define LLVM_LIB_DEBUGINFO_DWARFUNIT_H #include "DWARFDebugAbbrev.h" #include "DWARFDebugInfoEntry.h" #include "DWARFDebugRangeList.h" #include "DWARFRelocMap.h" +#include "DWARFSection.h" #include namespace llvm { @@ -22,21 +23,96 @@ namespace object { class ObjectFile; } +class DWARFContext; class DWARFDebugAbbrev; +class DWARFUnit; class StringRef; class raw_ostream; +/// Base class for all DWARFUnitSection classes. This provides the +/// functionality common to all unit types. +class DWARFUnitSectionBase { +public: + /// Returns the Unit that contains the given section offset in the + /// same section this Unit originated from. + virtual DWARFUnit *getUnitForOffset(uint32_t Offset) const = 0; + + void parse(DWARFContext &C, const DWARFSection &Section); + void parseDWO(DWARFContext &C, const DWARFSection &DWOSection); + +protected: + virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section, + const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, + StringRef SOS, StringRef AOS, bool isLittleEndian) = 0; + + ~DWARFUnitSectionBase() {} +}; + +/// Concrete instance of DWARFUnitSection, specialized for one Unit type. +template +class DWARFUnitSection final : public SmallVector, 1>, + public DWARFUnitSectionBase { + + struct UnitOffsetComparator { + bool operator()(uint32_t LHS, + const std::unique_ptr &RHS) const { + return LHS < RHS->getNextUnitOffset(); + } + }; + + bool Parsed; + +public: + DWARFUnitSection() : Parsed(false) {} + DWARFUnitSection(DWARFUnitSection &&DUS) : + SmallVector, 1>(std::move(DUS)), Parsed(DUS.Parsed) {} + + typedef llvm::SmallVectorImpl> UnitVector; + typedef typename UnitVector::iterator iterator; + typedef llvm::iterator_range iterator_range; + + UnitType *getUnitForOffset(uint32_t Offset) const override { + auto *CU = std::upper_bound(this->begin(), this->end(), Offset, + UnitOffsetComparator()); + if (CU != this->end()) + return CU->get(); + return nullptr; + } + +private: + void parseImpl(DWARFContext &Context, const DWARFSection &Section, + const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, + StringRef SOS, StringRef AOS, bool LE) override { + if (Parsed) + return; + DataExtractor Data(Section.Data, LE, 0); + uint32_t Offset = 0; + while (Data.isValidOffset(Offset)) { + auto U = llvm::make_unique(Context, Section, DA, RS, SS, SOS, + AOS, LE, *this); + if (!U->extract(Data, &Offset)) + break; + this->push_back(std::move(U)); + Offset = this->back()->getNextUnitOffset(); + } + Parsed = true; + } +}; + class DWARFUnit { + DWARFContext &Context; + // Section containing this DWARFUnit. + const DWARFSection &InfoSection; + const DWARFDebugAbbrev *Abbrev; - StringRef InfoSection; StringRef RangeSection; uint32_t RangeSectionBase; StringRef StringSection; StringRef StringOffsetSection; StringRef AddrOffsetSection; uint32_t AddrOffsetSectionBase; - const RelocAddrMap *RelocMap; bool isLittleEndian; + const DWARFUnitSectionBase &UnitSection; uint32_t Offset; uint32_t Length; @@ -48,11 +124,11 @@ class DWARFUnit { std::vector DieArray; class DWOHolder { - std::unique_ptr DWOFile; + object::OwningBinary DWOFile; std::unique_ptr DWOContext; DWARFUnit *DWOU; public: - DWOHolder(object::ObjectFile *DWOFile); + DWOHolder(StringRef DWOPath); DWARFUnit *getUnit() const { return DWOU; } }; std::unique_ptr DWO; @@ -63,12 +139,15 @@ protected: virtual uint32_t getHeaderSize() const { return 11; } public: - DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS, - StringRef SS, StringRef SOS, StringRef AOS, const RelocAddrMap *M, - bool LE); + DWARFUnit(DWARFContext &Context, const DWARFSection &Section, + const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, + StringRef SOS, StringRef AOS, bool LE, + const DWARFUnitSectionBase &UnitSection); virtual ~DWARFUnit(); + DWARFContext& getContext() const { return Context; } + StringRef getStringSection() const { return StringSection; } StringRef getStringOffsetSection() const { return StringOffsetSection; } void setAddrOffsetSection(StringRef AOS, uint32_t Base) { @@ -85,13 +164,13 @@ public: bool getStringOffsetSectionItem(uint32_t Index, uint32_t &Result) const; DataExtractor getDebugInfoExtractor() const { - return DataExtractor(InfoSection, isLittleEndian, AddrSize); + return DataExtractor(InfoSection.Data, isLittleEndian, AddrSize); } DataExtractor getStringExtractor() const { return DataExtractor(StringSection, false, 0); } - const RelocAddrMap *getRelocMap() const { return RelocMap; } + const RelocAddrMap *getRelocMap() const { return &InfoSection.Relocs; } bool extract(DataExtractor debug_info, uint32_t* offset_ptr); @@ -131,6 +210,9 @@ public: /// chain is valid as long as parsed compile unit DIEs are not cleared. DWARFDebugInfoEntryInlinedChain getInlinedChainForAddress(uint64_t Address); + /// getUnitSection - Return the DWARFUnitSection containing this unit. + const DWARFUnitSectionBase &getUnitSection() const { return UnitSection; } + private: /// Size in bytes of the .debug_info data associated with this compile unit. size_t getDebugInfoSize() const { return Length + 4 - getHeaderSize(); } diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt index 3102c7b..fae5bb9 100644 --- a/lib/ExecutionEngine/CMakeLists.txt +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -3,12 +3,12 @@ add_llvm_library(LLVMExecutionEngine ExecutionEngine.cpp ExecutionEngineBindings.cpp + JITEventListener.cpp RTDyldMemoryManager.cpp TargetSelect.cpp ) add_subdirectory(Interpreter) -add_subdirectory(JIT) add_subdirectory(MCJIT) add_subdirectory(RuntimeDyld) diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index b0e985d..5a6d656 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -16,7 +16,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ExecutionEngine/ObjectBuffer.h" #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -24,6 +24,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" @@ -47,22 +48,13 @@ void ObjectCache::anchor() {} void ObjectBuffer::anchor() {} void ObjectBufferStream::anchor() {} -ExecutionEngine *(*ExecutionEngine::JITCtor)( - Module *M, - std::string *ErrorStr, - JITMemoryManager *JMM, - bool GVsWithCode, - TargetMachine *TM) = nullptr; ExecutionEngine *(*ExecutionEngine::MCJITCtor)( - Module *M, - std::string *ErrorStr, - RTDyldMemoryManager *MCJMM, - bool GVsWithCode, - TargetMachine *TM) = nullptr; -ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, + std::unique_ptr M, std::string *ErrorStr, + RTDyldMemoryManager *MCJMM, std::unique_ptr TM) = nullptr; +ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr M, std::string *ErrorStr) =nullptr; -ExecutionEngine::ExecutionEngine(Module *M) +ExecutionEngine::ExecutionEngine(std::unique_ptr M) : EEState(*this), LazyFunctionCreator(nullptr) { CompilingLazily = false; @@ -77,14 +69,12 @@ ExecutionEngine::ExecutionEngine(Module *M) VerifyModules = false; #endif - Modules.push_back(M); assert(M && "Module is null?"); + Modules.push_back(std::move(M)); } ExecutionEngine::~ExecutionEngine() { clearAllGlobalMappings(); - for (unsigned i = 0, e = Modules.size(); i != e; ++i) - delete Modules[i]; } namespace { @@ -101,8 +91,8 @@ public: Type *ElTy = GV->getType()->getElementType(); size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy); void *RawMemory = ::operator new( - DataLayout::RoundUpAlignment(sizeof(GVMemoryBlock), - TD.getPreferredAlignment(GV)) + RoundUpToAlignment(sizeof(GVMemoryBlock), + TD.getPreferredAlignment(GV)) + GVSize); new(RawMemory) GVMemoryBlock(GV); return static_cast(RawMemory) + sizeof(GVMemoryBlock); @@ -126,11 +116,20 @@ void ExecutionEngine::addObjectFile(std::unique_ptr O) { llvm_unreachable("ExecutionEngine subclass doesn't implement addObjectFile."); } +void +ExecutionEngine::addObjectFile(object::OwningBinary O) { + llvm_unreachable("ExecutionEngine subclass doesn't implement addObjectFile."); +} + +void ExecutionEngine::addArchive(object::OwningBinary A) { + llvm_unreachable("ExecutionEngine subclass doesn't implement addArchive."); +} + bool ExecutionEngine::removeModule(Module *M) { - for(SmallVectorImpl::iterator I = Modules.begin(), - E = Modules.end(); I != E; ++I) { - Module *Found = *I; + for (auto I = Modules.begin(), E = Modules.end(); I != E; ++I) { + Module *Found = I->get(); if (Found == M) { + I->release(); Modules.erase(I); clearGlobalMappingsFromModule(M); return true; @@ -254,19 +253,9 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { namespace { class ArgvArray { - char *Array; - std::vector Values; + std::unique_ptr Array; + std::vector> Values; public: - ArgvArray() : Array(nullptr) {} - ~ArgvArray() { clear(); } - void clear() { - delete[] Array; - Array = nullptr; - for (size_t I = 0, E = Values.size(); I != E; ++I) { - delete[] Values[I]; - } - Values.clear(); - } /// Turn a vector of strings into a nice argv style array of pointers to null /// terminated strings. void *reset(LLVMContext &C, ExecutionEngine *EE, @@ -275,38 +264,39 @@ public: } // anonymous namespace void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, const std::vector &InputArgv) { - clear(); // Free the old contents. + Values.clear(); // Free the old contents. + Values.reserve(InputArgv.size()); unsigned PtrSize = EE->getDataLayout()->getPointerSize(); - Array = new char[(InputArgv.size()+1)*PtrSize]; + Array = make_unique((InputArgv.size()+1)*PtrSize); - DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n"); + DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array.get() << "\n"); Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; - char *Dest = new char[Size]; - Values.push_back(Dest); - DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"); + auto Dest = make_unique(Size); + DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest.get() << "\n"); - std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest); + std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest.get()); Dest[Size-1] = 0; // Endian safe: Array[i] = (PointerTy)Dest; - EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Array+i*PtrSize), - SBytePtr); + EE->StoreValueToMemory(PTOGV(Dest.get()), + (GenericValue*)(&Array[i*PtrSize]), SBytePtr); + Values.push_back(std::move(Dest)); } // Null terminate it EE->StoreValueToMemory(PTOGV(nullptr), - (GenericValue*)(Array+InputArgv.size()*PtrSize), + (GenericValue*)(&Array[InputArgv.size()*PtrSize]), SBytePtr); - return Array; + return Array.get(); } -void ExecutionEngine::runStaticConstructorsDestructors(Module *module, +void ExecutionEngine::runStaticConstructorsDestructors(Module &module, bool isDtors) { const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors"; - GlobalVariable *GV = module->getNamedGlobal(Name); + GlobalVariable *GV = module.getNamedGlobal(Name); // If this global has internal linkage, or if it has a use, then it must be // an old-style (llvmgcc3) static ctor with __main linked in and in use. If @@ -344,8 +334,8 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module, void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) { // Execute global ctors/dtors for each module in the program. - for (unsigned i = 0, e = Modules.size(); i != e; ++i) - runStaticConstructorsDestructors(Modules[i], isDtors); + for (std::unique_ptr &M : Modules) + runStaticConstructorsDestructors(*M, isDtors); } #ifndef NDEBUG @@ -406,68 +396,14 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, return runFunction(Fn, GVArgs).IntVal.getZExtValue(); } -ExecutionEngine *ExecutionEngine::create(Module *M, - bool ForceInterpreter, - std::string *ErrorStr, - CodeGenOpt::Level OptLevel, - bool GVsWithCode) { - - EngineBuilder EB = - EngineBuilder(M) - .setEngineKind(ForceInterpreter ? EngineKind::Interpreter - : EngineKind::Either) - .setErrorStr(ErrorStr) - .setOptLevel(OptLevel) - .setAllocateGVsWithCode(GVsWithCode); - - return EB.create(); -} - -/// createJIT - This is the factory method for creating a JIT for the current -/// machine, it does not fall back to the interpreter. This takes ownership -/// of the module. -ExecutionEngine *ExecutionEngine::createJIT(Module *M, - std::string *ErrorStr, - JITMemoryManager *JMM, - CodeGenOpt::Level OL, - bool GVsWithCode, - Reloc::Model RM, - CodeModel::Model CMM) { - if (!ExecutionEngine::JITCtor) { - if (ErrorStr) - *ErrorStr = "JIT has not been linked in."; - return nullptr; - } - - // Use the defaults for extra parameters. Users can use EngineBuilder to - // set them. - EngineBuilder EB(M); - EB.setEngineKind(EngineKind::JIT); - EB.setErrorStr(ErrorStr); - EB.setRelocationModel(RM); - EB.setCodeModel(CMM); - EB.setAllocateGVsWithCode(GVsWithCode); - EB.setOptLevel(OL); - EB.setJITMemoryManager(JMM); - - // TODO: permit custom TargetOptions here - TargetMachine *TM = EB.selectTarget(); - if (!TM || (ErrorStr && ErrorStr->length() > 0)) return nullptr; - - return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM); -} - void EngineBuilder::InitEngine() { WhichEngine = EngineKind::Either; ErrorStr = nullptr; OptLevel = CodeGenOpt::Default; MCJMM = nullptr; - JMM = nullptr; Options = TargetOptions(); - AllocateGVsWithCode = false; RelocModel = Reloc::Default; CMModel = CodeModel::JITDefault; - UseMCJIT = false; // IR module verification is enabled by default in debug builds, and disabled // by default in release builds. @@ -485,13 +421,11 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { // to the function tells DynamicLibrary to load the program, not a library. if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, ErrorStr)) return nullptr; - - assert(!(JMM && MCJMM)); // If the user specified a memory manager but didn't specify which engine to // create, we assume they only want the JIT, and we fail if they only want // the interpreter. - if (JMM || MCJMM) { + if (MCJMM) { if (WhichEngine & EngineKind::JIT) WhichEngine = EngineKind::JIT; else { @@ -500,14 +434,6 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return nullptr; } } - - if (MCJMM && ! UseMCJIT) { - if (ErrorStr) - *ErrorStr = - "Cannot create a legacy JIT with a runtime dyld memory " - "manager."; - return nullptr; - } // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. @@ -520,13 +446,9 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { } ExecutionEngine *EE = nullptr; - if (UseMCJIT && ExecutionEngine::MCJITCtor) - EE = ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM, - AllocateGVsWithCode, TheTM.release()); - else if (ExecutionEngine::JITCtor) - EE = ExecutionEngine::JITCtor(M, ErrorStr, JMM, - AllocateGVsWithCode, TheTM.release()); - + if (ExecutionEngine::MCJITCtor) + EE = ExecutionEngine::MCJITCtor(std::move(M), ErrorStr, MCJMM, + std::move(TheTM)); if (EE) { EE->setVerifyModules(VerifyModules); return EE; @@ -537,14 +459,13 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { // an interpreter instead. if (WhichEngine & EngineKind::Interpreter) { if (ExecutionEngine::InterpCtor) - return ExecutionEngine::InterpCtor(M, ErrorStr); + return ExecutionEngine::InterpCtor(std::move(M), ErrorStr); if (ErrorStr) *ErrorStr = "Interpreter has not been linked in."; return nullptr; } - if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::JITCtor && - !ExecutionEngine::MCJITCtor) { + if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::MCJITCtor) { if (ErrorStr) *ErrorStr = "JIT has not been linked in."; } @@ -890,9 +811,6 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { Result = PTOGV(getPointerToFunctionOrStub(const_cast(F))); else if (const GlobalVariable *GV = dyn_cast(C)) Result = PTOGV(getOrEmitGlobalVariable(const_cast(GV))); - else if (const BlockAddress *BA = dyn_cast(C)) - Result = PTOGV(getPointerToBasicBlock(const_cast( - BA->getBasicBlock()))); else llvm_unreachable("Unknown constant pointer type!"); break; diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 6ff1e7a..58271df 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -27,14 +27,6 @@ using namespace llvm; // Wrapping the C bindings types. DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef) -inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) { - return reinterpret_cast(P); -} - -inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) { - TargetLibraryInfo *X = const_cast(P); - return reinterpret_cast(X); -} inline LLVMTargetMachineRef wrap(const TargetMachine *P) { return @@ -110,7 +102,7 @@ LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE, LLVMModuleRef M, char **OutError) { std::string Error; - EngineBuilder builder(unwrap(M)); + EngineBuilder builder(std::unique_ptr(unwrap(M))); builder.setEngineKind(EngineKind::Either) .setErrorStr(&Error); if (ExecutionEngine *EE = builder.create()){ @@ -125,7 +117,7 @@ LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp, LLVMModuleRef M, char **OutError) { std::string Error; - EngineBuilder builder(unwrap(M)); + EngineBuilder builder(std::unique_ptr(unwrap(M))); builder.setEngineKind(EngineKind::Interpreter) .setErrorStr(&Error); if (ExecutionEngine *Interp = builder.create()) { @@ -141,7 +133,7 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT, unsigned OptLevel, char **OutError) { std::string Error; - EngineBuilder builder(unwrap(M)); + EngineBuilder builder(std::unique_ptr(unwrap(M))); builder.setEngineKind(EngineKind::JIT) .setErrorStr(&Error) .setOptLevel((CodeGenOpt::Level)OptLevel); @@ -189,10 +181,9 @@ LLVMBool LLVMCreateMCJITCompilerForModule( targetOptions.EnableFastISel = options.EnableFastISel; std::string Error; - EngineBuilder builder(unwrap(M)); + EngineBuilder builder(std::unique_ptr(unwrap(M))); builder.setEngineKind(EngineKind::JIT) .setErrorStr(&Error) - .setUseMCJIT(true) .setOptLevel((CodeGenOpt::Level)options.OptLevel) .setCodeModel(unwrap(options.CodeModel)) .setTargetOptions(targetOptions); @@ -275,11 +266,10 @@ LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F, } void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) { - unwrap(EE)->freeMachineCodeForFunction(unwrap(F)); } void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){ - unwrap(EE)->addModule(unwrap(M)); + unwrap(EE)->addModule(std::unique_ptr(unwrap(M))); } void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){ @@ -314,7 +304,7 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) { - return unwrap(EE)->recompileAndRelinkFunction(unwrap(Fn)); + return nullptr; } LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) { diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 4e22a8b..b23ca88 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -57,29 +57,11 @@ public: ~IntelJITEventListener() { } - virtual void NotifyFunctionEmitted(const Function &F, - void *FnStart, size_t FnSize, - const EmittedFunctionDetails &Details); - - virtual void NotifyFreeingMachineCode(void *OldPtr); - virtual void NotifyObjectEmitted(const ObjectImage &Obj); virtual void NotifyFreeingObject(const ObjectImage &Obj); }; -static LineNumberInfo LineStartToIntelJITFormat( - uintptr_t StartAddress, - uintptr_t Address, - DebugLoc Loc) { - LineNumberInfo Result; - - Result.Offset = Address - StartAddress; - Result.LineNumber = Loc.getLine(); - - return Result; -} - static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress, uintptr_t Address, DILineInfo Line) { @@ -113,84 +95,10 @@ static iJIT_Method_Load FunctionDescToIntelJITFormat( return Result; } -// Adds the just-emitted function to the symbol table. -void IntelJITEventListener::NotifyFunctionEmitted( - const Function &F, void *FnStart, size_t FnSize, - const EmittedFunctionDetails &Details) { - iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper, - F.getName().data(), - reinterpret_cast(FnStart), - FnSize); - - std::vector LineInfo; - - if (!Details.LineStarts.empty()) { - // Now convert the line number information from the address/DebugLoc - // format in Details to the offset/lineno in Intel JIT API format. - - LineInfo.reserve(Details.LineStarts.size() + 1); - - DebugLoc FirstLoc = Details.LineStarts[0].Loc; - assert(!FirstLoc.isUnknown() - && "LineStarts should not contain unknown DebugLocs"); - - MDNode *FirstLocScope = FirstLoc.getScope(F.getContext()); - DISubprogram FunctionDI = getDISubprogram(FirstLocScope); - if (FunctionDI.Verify()) { - FunctionMessage.source_file_name = const_cast( - Filenames.getFullPath(FirstLocScope)); - - LineNumberInfo FirstLine; - FirstLine.Offset = 0; - FirstLine.LineNumber = FunctionDI.getLineNumber(); - LineInfo.push_back(FirstLine); - } - - for (std::vector::const_iterator I = - Details.LineStarts.begin(), E = Details.LineStarts.end(); - I != E; ++I) { - // This implementation ignores the DebugLoc filename because the Intel - // JIT API does not support multiple source files associated with a single - // JIT function - LineInfo.push_back(LineStartToIntelJITFormat( - reinterpret_cast(FnStart), - I->Address, - I->Loc)); - - // If we have no file name yet for the function, use the filename from - // the first instruction that has one - if (FunctionMessage.source_file_name == 0) { - MDNode *scope = I->Loc.getScope( - Details.MF->getFunction()->getContext()); - FunctionMessage.source_file_name = const_cast( - Filenames.getFullPath(scope)); - } - } - - FunctionMessage.line_number_size = LineInfo.size(); - FunctionMessage.line_number_table = &*LineInfo.begin(); - } else { - FunctionMessage.line_number_size = 0; - FunctionMessage.line_number_table = 0; - } - - Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, - &FunctionMessage); - MethodIDs[FnStart] = FunctionMessage.method_id; -} - -void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { - MethodIDMap::iterator I = MethodIDs.find(FnStart); - if (I != MethodIDs.end()) { - Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); - MethodIDs.erase(I); - } -} - void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { // Get the address of the object image for use as a unique identifier const void* ObjData = Obj.getData().data(); - DIContext* Context = DIContext::getDWARFContext(Obj.getObjectFile()); + DIContext* Context = DIContext::getDWARFContext(*Obj.getObjectFile()); MethodAddressVector Functions; // Use symbol info to iterate functions in the object. diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt index 9c06fda..e36493e 100644 --- a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt +++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt @@ -21,3 +21,4 @@ type = OptionalLibrary name = IntelJITEvents parent = ExecutionEngine +required_libraries = Core DebugInfo Support diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt index 74df8f0..1aac3ac 100644 --- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt +++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt @@ -13,7 +13,7 @@ add_llvm_library(LLVMInterpreter ) if( LLVM_ENABLE_FFI ) - target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} ) + target_link_libraries( LLVMInterpreter ${cmake_2_8_12_PRIVATE} ${FFI_LIBRARY_PATH} ) endif() add_dependencies(LLVMInterpreter intrinsics_gen) diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 671bbee..b022101 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/UniqueLock.h" #include #include #include @@ -51,7 +52,7 @@ static ManagedStatic FunctionsLock; typedef GenericValue (*ExFunc)(FunctionType *, const std::vector &); static ManagedStatic > ExportedFunctions; -static std::map FuncNames; +static ManagedStatic > FuncNames; #ifdef USE_LIBFFI typedef void (*RawFunc)(); @@ -97,9 +98,9 @@ static ExFunc lookupFunction(const Function *F) { ExtName += "_" + F->getName().str(); sys::ScopedLock Writer(*FunctionsLock); - ExFunc FnPtr = FuncNames[ExtName]; + ExFunc FnPtr = (*FuncNames)[ExtName]; if (!FnPtr) - FnPtr = FuncNames["lle_X_" + F->getName().str()]; + FnPtr = (*FuncNames)["lle_X_" + F->getName().str()]; if (!FnPtr) // Try calling a generic function... if it exists... FnPtr = (ExFunc)(intptr_t) sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" + @@ -248,14 +249,14 @@ GenericValue Interpreter::callExternalFunction(Function *F, const std::vector &ArgVals) { TheInterpreter = this; - FunctionsLock->acquire(); + unique_lock Guard(*FunctionsLock); // Do a lookup to see if the function is in our cache... this should just be a // deferred annotation! std::map::iterator FI = ExportedFunctions->find(F); if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F) : FI->second) { - FunctionsLock->release(); + Guard.unlock(); return Fn(F->getFunctionType(), ArgVals); } @@ -273,7 +274,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, RawFn = RF->second; } - FunctionsLock->release(); + Guard.unlock(); GenericValue Result; if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getDataLayout(), Result)) @@ -497,15 +498,15 @@ static GenericValue lle_X_memcpy(FunctionType *FT, void Interpreter::initializeExternalFunctions() { sys::ScopedLock Writer(*FunctionsLock); - FuncNames["lle_X_atexit"] = lle_X_atexit; - FuncNames["lle_X_exit"] = lle_X_exit; - FuncNames["lle_X_abort"] = lle_X_abort; - - FuncNames["lle_X_printf"] = lle_X_printf; - FuncNames["lle_X_sprintf"] = lle_X_sprintf; - FuncNames["lle_X_sscanf"] = lle_X_sscanf; - FuncNames["lle_X_scanf"] = lle_X_scanf; - FuncNames["lle_X_fprintf"] = lle_X_fprintf; - FuncNames["lle_X_memset"] = lle_X_memset; - FuncNames["lle_X_memcpy"] = lle_X_memcpy; + (*FuncNames)["lle_X_atexit"] = lle_X_atexit; + (*FuncNames)["lle_X_exit"] = lle_X_exit; + (*FuncNames)["lle_X_abort"] = lle_X_abort; + + (*FuncNames)["lle_X_printf"] = lle_X_printf; + (*FuncNames)["lle_X_sprintf"] = lle_X_sprintf; + (*FuncNames)["lle_X_sscanf"] = lle_X_sscanf; + (*FuncNames)["lle_X_scanf"] = lle_X_scanf; + (*FuncNames)["lle_X_fprintf"] = lle_X_fprintf; + (*FuncNames)["lle_X_memset"] = lle_X_memset; + (*FuncNames)["lle_X_memcpy"] = lle_X_memcpy; } diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp index 814efcc..8562981 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -30,9 +30,10 @@ static struct RegisterInterp { extern "C" void LLVMLinkInInterpreter() { } -/// create - Create a new interpreter object. This can never fail. +/// Create a new interpreter object. /// -ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) { +ExecutionEngine *Interpreter::create(std::unique_ptr M, + std::string *ErrStr) { // Tell this Module to materialize everything and release the GVMaterializer. if (std::error_code EC = M->materializeAllPermanently()) { if (ErrStr) @@ -41,15 +42,15 @@ ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) { return nullptr; } - return new Interpreter(M); + return new Interpreter(std::move(M)); } //===----------------------------------------------------------------------===// // Interpreter ctor - Initialize stuff // -Interpreter::Interpreter(Module *M) - : ExecutionEngine(M), TD(M) { - +Interpreter::Interpreter(std::unique_ptr M) + : ExecutionEngine(std::move(M)), TD(Modules.back().get()) { + memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); setDataLayout(&TD); // Initialize the "backend" diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index 2145cde..2be9c59 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLI_INTERPRETER_H -#define LLI_INTERPRETER_H +#ifndef LLVM_LIB_EXECUTIONENGINE_INTERPRETER_INTERPRETER_H +#define LLVM_LIB_EXECUTIONENGINE_INTERPRETER_INTERPRETER_H #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -37,29 +37,24 @@ typedef generic_gep_type_iterator gep_type_iterator; // stack, which causes the dtor to be run, which frees all the alloca'd memory. // class AllocaHolder { - friend class AllocaHolderHandle; - std::vector Allocations; - unsigned RefCnt; + std::vector Allocations; + public: - AllocaHolder() : RefCnt(0) {} - void add(void *mem) { Allocations.push_back(mem); } - ~AllocaHolder() { - for (unsigned i = 0; i < Allocations.size(); ++i) - free(Allocations[i]); + AllocaHolder() {} + + // Make this type move-only. Define explicit move special members for MSVC. + AllocaHolder(AllocaHolder &&RHS) : Allocations(std::move(RHS.Allocations)) {} + AllocaHolder &operator=(AllocaHolder &&RHS) { + Allocations = std::move(RHS.Allocations); + return *this; } -}; -// AllocaHolderHandle gives AllocaHolder value semantics so we can stick it into -// a vector... -// -class AllocaHolderHandle { - AllocaHolder *H; -public: - AllocaHolderHandle() : H(new AllocaHolder()) { H->RefCnt++; } - AllocaHolderHandle(const AllocaHolderHandle &AH) : H(AH.H) { H->RefCnt++; } - ~AllocaHolderHandle() { if (--H->RefCnt == 0) delete H; } + ~AllocaHolder() { + for (void *Allocation : Allocations) + free(Allocation); + } - void add(void *mem) { H->add(mem); } + void add(void *Mem) { Allocations.push_back(Mem); } }; typedef std::vector ValuePlaneTy; @@ -71,11 +66,29 @@ struct ExecutionContext { Function *CurFunction;// The currently executing function BasicBlock *CurBB; // The currently executing BB BasicBlock::iterator CurInst; // The next instruction to execute - std::map Values; // LLVM values used in this invocation - std::vector VarArgs; // Values passed through an ellipsis CallSite Caller; // Holds the call that called subframes. // NULL if main func or debugger invoked fn - AllocaHolderHandle Allocas; // Track memory allocated by alloca + std::map Values; // LLVM values used in this invocation + std::vector VarArgs; // Values passed through an ellipsis + AllocaHolder Allocas; // Track memory allocated by alloca + + ExecutionContext() : CurFunction(nullptr), CurBB(nullptr), CurInst(nullptr) {} + + ExecutionContext(ExecutionContext &&O) + : CurFunction(O.CurFunction), CurBB(O.CurBB), CurInst(O.CurInst), + Caller(O.Caller), Values(std::move(O.Values)), + VarArgs(std::move(O.VarArgs)), Allocas(std::move(O.Allocas)) {} + + ExecutionContext &operator=(ExecutionContext &&O) { + CurFunction = O.CurFunction; + CurBB = O.CurBB; + CurInst = O.CurInst; + Caller = O.Caller; + Values = std::move(O.Values); + VarArgs = std::move(O.VarArgs); + Allocas = std::move(O.Allocas); + return *this; + } }; // Interpreter - This class represents the entirety of the interpreter. @@ -94,7 +107,7 @@ class Interpreter : public ExecutionEngine, public InstVisitor { std::vector AtExitHandlers; public: - explicit Interpreter(Module *M); + explicit Interpreter(std::unique_ptr M); ~Interpreter(); /// runAtExitHandlers - Run any functions registered by the program's calls to @@ -105,33 +118,23 @@ public: static void Register() { InterpCtor = create; } - - /// create - Create an interpreter ExecutionEngine. This can never fail. + + /// Create an interpreter ExecutionEngine. /// - static ExecutionEngine *create(Module *M, std::string *ErrorStr = nullptr); + static ExecutionEngine *create(std::unique_ptr M, + std::string *ErrorStr = nullptr); /// run - Start execution with the specified function and arguments. /// GenericValue runFunction(Function *F, const std::vector &ArgValues) override; - void *getPointerToNamedFunction(const std::string &Name, + void *getPointerToNamedFunction(StringRef Name, bool AbortOnFailure = true) override { // FIXME: not implemented. return nullptr; } - /// recompileAndRelinkFunction - For the interpreter, functions are always - /// up-to-date. - /// - void *recompileAndRelinkFunction(Function *F) override { - return getPointerToFunction(F); - } - - /// freeMachineCodeForFunction - The interpreter does not generate any code. - /// - void freeMachineCodeForFunction(Function *F) override { } - // Methods used to execute code: // Place a call on the stack void callFunction(Function *F, const std::vector &ArgVals); @@ -213,7 +216,6 @@ private: // Helper functions void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF); void *getPointerToFunction(Function *F) override { return (void*)F; } - void *getPointerToBasicBlock(BasicBlock *BB) override { return (void*)BB; } void initializeExecutionEngine() { } void initializeExternalFunctions(); diff --git a/lib/ExecutionEngine/JIT/Android.mk b/lib/ExecutionEngine/JIT/Android.mk deleted file mode 100644 index 0466ba0..0000000 --- a/lib/ExecutionEngine/JIT/Android.mk +++ /dev/null @@ -1,17 +0,0 @@ -LOCAL_PATH:= $(call my-dir) - -# For the host -# ===================================================== -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := \ - JIT.cpp \ - JITEmitter.cpp \ - JITMemoryManager.cpp - -LOCAL_MODULE:= libLLVMJIT - -LOCAL_MODULE_TAGS := optional - -include $(LLVM_HOST_BUILD_MK) -include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt deleted file mode 100644 index e16baed..0000000 --- a/lib/ExecutionEngine/JIT/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# TODO: Support other architectures. See Makefile. -add_definitions(-DENABLE_X86_JIT) - -add_llvm_library(LLVMJIT - JIT.cpp - JITEmitter.cpp - JITMemoryManager.cpp - ) diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp deleted file mode 100644 index 83ec978..0000000 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ /dev/null @@ -1,695 +0,0 @@ -//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This tool implements a just-in-time compiler for LLVM, allowing direct -// execution of LLVM bitcode in an efficient manner. -// -//===----------------------------------------------------------------------===// - -#include "JIT.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/MachineCodeInfo.h" -#include "llvm/Config/config.h" -#include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/ExecutionEngine/JITEventListener.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/MutexGuard.h" -#include "llvm/Target/TargetJITInfo.h" -#include "llvm/Target/TargetMachine.h" - -using namespace llvm; - -#ifdef __APPLE__ -// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead -// of atexit). It passes the address of linker generated symbol __dso_handle -// to the function. -// This configuration change happened at version 5330. -# include -# if defined(MAC_OS_X_VERSION_10_4) && \ - ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \ - (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \ - __APPLE_CC__ >= 5330)) -# ifndef HAVE___DSO_HANDLE -# define HAVE___DSO_HANDLE 1 -# endif -# endif -#endif - -#if HAVE___DSO_HANDLE -extern void *__dso_handle __attribute__ ((__visibility__ ("hidden"))); -#endif - -namespace { - -static struct RegisterJIT { - RegisterJIT() { JIT::Register(); } -} JITRegistrator; - -} - -extern "C" void LLVMLinkInJIT() { -} - -/// createJIT - This is the factory method for creating a JIT for the current -/// machine, it does not fall back to the interpreter. This takes ownership -/// of the module. -ExecutionEngine *JIT::createJIT(Module *M, - std::string *ErrorStr, - JITMemoryManager *JMM, - bool GVsWithCode, - TargetMachine *TM) { - // Try to register the program as a source of symbols to resolve against. - // - // FIXME: Don't do this here. - sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr); - - // If the target supports JIT code generation, create the JIT. - if (TargetJITInfo *TJ = TM->getJITInfo()) { - return new JIT(M, *TM, *TJ, JMM, GVsWithCode); - } else { - if (ErrorStr) - *ErrorStr = "target does not support JIT code generation"; - return nullptr; - } -} - -namespace { -/// This class supports the global getPointerToNamedFunction(), which allows -/// bugpoint or gdb users to search for a function by name without any context. -class JitPool { - SmallPtrSet JITs; // Optimize for process containing just 1 JIT. - mutable sys::Mutex Lock; -public: - void Add(JIT *jit) { - MutexGuard guard(Lock); - JITs.insert(jit); - } - void Remove(JIT *jit) { - MutexGuard guard(Lock); - JITs.erase(jit); - } - void *getPointerToNamedFunction(const char *Name) const { - MutexGuard guard(Lock); - assert(JITs.size() != 0 && "No Jit registered"); - //search function in every instance of JIT - for (SmallPtrSet::const_iterator Jit = JITs.begin(), - end = JITs.end(); - Jit != end; ++Jit) { - if (Function *F = (*Jit)->FindFunctionNamed(Name)) - return (*Jit)->getPointerToFunction(F); - } - // The function is not available : fallback on the first created (will - // search in symbol of the current program/library) - return (*JITs.begin())->getPointerToNamedFunction(Name); - } -}; -ManagedStatic AllJits; -} -extern "C" { - // getPointerToNamedFunction - This function is used as a global wrapper to - // JIT::getPointerToNamedFunction for the purpose of resolving symbols when - // bugpoint is debugging the JIT. In that scenario, we are loading an .so and - // need to resolve function(s) that are being mis-codegenerated, so we need to - // resolve their addresses at runtime, and this is the way to do it. - void *getPointerToNamedFunction(const char *Name) { - return AllJits->getPointerToNamedFunction(Name); - } -} - -JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *jmm, bool GVsWithCode) - : ExecutionEngine(M), TM(tm), TJI(tji), - JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()), - AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) { - setDataLayout(TM.getDataLayout()); - - jitstate = new JITState(M); - - // Initialize JCE - JCE = createEmitter(*this, JMM, TM); - - // Register in global list of all JITs. - AllJits->Add(this); - - // Add target data - MutexGuard locked(lock); - FunctionPassManager &PM = jitstate->getPM(); - M->setDataLayout(TM.getDataLayout()); - PM.add(new DataLayoutPass(M)); - - // Turn the machine code intermediate representation into bytes in memory that - // may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { - report_fatal_error("Target does not support machine code emission!"); - } - - // Initialize passes. - PM.doInitialization(); -} - -JIT::~JIT() { - // Cleanup. - AllJits->Remove(this); - delete jitstate; - delete JCE; - // JMM is a ownership of JCE, so we no need delete JMM here. - delete &TM; -} - -/// addModule - Add a new Module to the JIT. If we previously removed the last -/// Module, we need re-initialize jitstate with a valid Module. -void JIT::addModule(Module *M) { - MutexGuard locked(lock); - - if (Modules.empty()) { - assert(!jitstate && "jitstate should be NULL if Modules vector is empty!"); - - jitstate = new JITState(M); - - FunctionPassManager &PM = jitstate->getPM(); - M->setDataLayout(TM.getDataLayout()); - PM.add(new DataLayoutPass(M)); - - // Turn the machine code intermediate representation into bytes in memory - // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { - report_fatal_error("Target does not support machine code emission!"); - } - - // Initialize passes. - PM.doInitialization(); - } - - ExecutionEngine::addModule(M); -} - -/// removeModule - If we are removing the last Module, invalidate the jitstate -/// since the PassManager it contains references a released Module. -bool JIT::removeModule(Module *M) { - bool result = ExecutionEngine::removeModule(M); - - MutexGuard locked(lock); - - if (jitstate && jitstate->getModule() == M) { - delete jitstate; - jitstate = nullptr; - } - - if (!jitstate && !Modules.empty()) { - jitstate = new JITState(Modules[0]); - - FunctionPassManager &PM = jitstate->getPM(); - M->setDataLayout(TM.getDataLayout()); - PM.add(new DataLayoutPass(M)); - - // Turn the machine code intermediate representation into bytes in memory - // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { - report_fatal_error("Target does not support machine code emission!"); - } - - // Initialize passes. - PM.doInitialization(); - } - return result; -} - -/// run - Start execution with the specified function and arguments. -/// -GenericValue JIT::runFunction(Function *F, - const std::vector &ArgValues) { - assert(F && "Function *F was null at entry to run()"); - - void *FPtr = getPointerToFunction(F); - assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); - FunctionType *FTy = F->getFunctionType(); - Type *RetTy = FTy->getReturnType(); - - assert((FTy->getNumParams() == ArgValues.size() || - (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && - "Wrong number of arguments passed into function!"); - assert(FTy->getNumParams() == ArgValues.size() && - "This doesn't support passing arguments through varargs (yet)!"); - - // Handle some common cases first. These cases correspond to common `main' - // prototypes. - if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { - switch (ArgValues.size()) { - case 3: - if (FTy->getParamType(0)->isIntegerTy(32) && - FTy->getParamType(1)->isPointerTy() && - FTy->getParamType(2)->isPointerTy()) { - int (*PF)(int, char **, const char **) = - (int(*)(int, char **, const char **))(intptr_t)FPtr; - - // Call the function. - GenericValue rv; - rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), - (char **)GVTOP(ArgValues[1]), - (const char **)GVTOP(ArgValues[2]))); - return rv; - } - break; - case 2: - if (FTy->getParamType(0)->isIntegerTy(32) && - FTy->getParamType(1)->isPointerTy()) { - int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; - - // Call the function. - GenericValue rv; - rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), - (char **)GVTOP(ArgValues[1]))); - return rv; - } - break; - case 1: - if (FTy->getParamType(0)->isIntegerTy(32)) { - GenericValue rv; - int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; - rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); - return rv; - } - if (FTy->getParamType(0)->isPointerTy()) { - GenericValue rv; - int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr; - rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0]))); - return rv; - } - break; - } - } - - // Handle cases where no arguments are passed first. - if (ArgValues.empty()) { - GenericValue rv; - switch (RetTy->getTypeID()) { - default: llvm_unreachable("Unknown return type for function call!"); - case Type::IntegerTyID: { - unsigned BitWidth = cast(RetTy)->getBitWidth(); - if (BitWidth == 1) - rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)()); - else if (BitWidth <= 8) - rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)()); - else if (BitWidth <= 16) - rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)()); - else if (BitWidth <= 32) - rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); - else if (BitWidth <= 64) - rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); - else - llvm_unreachable("Integer types > 64 bits not supported"); - return rv; - } - case Type::VoidTyID: - rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)()); - return rv; - case Type::FloatTyID: - rv.FloatVal = ((float(*)())(intptr_t)FPtr)(); - return rv; - case Type::DoubleTyID: - rv.DoubleVal = ((double(*)())(intptr_t)FPtr)(); - return rv; - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - llvm_unreachable("long double not supported yet"); - case Type::PointerTyID: - return PTOGV(((void*(*)())(intptr_t)FPtr)()); - } - } - - // Okay, this is not one of our quick and easy cases. Because we don't have a - // full FFI, we have to codegen a nullary stub function that just calls the - // function we are interested in, passing in constants for all of the - // arguments. Make this function and return. - - // First, create the function. - FunctionType *STy=FunctionType::get(RetTy, false); - Function *Stub = Function::Create(STy, Function::InternalLinkage, "", - F->getParent()); - - // Insert a basic block. - BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub); - - // Convert all of the GenericValue arguments over to constants. Note that we - // currently don't support varargs. - SmallVector Args; - for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) { - Constant *C = nullptr; - Type *ArgTy = FTy->getParamType(i); - const GenericValue &AV = ArgValues[i]; - switch (ArgTy->getTypeID()) { - default: llvm_unreachable("Unknown argument type for function call!"); - case Type::IntegerTyID: - C = ConstantInt::get(F->getContext(), AV.IntVal); - break; - case Type::FloatTyID: - C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal)); - break; - case Type::DoubleTyID: - C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal)); - break; - case Type::PPC_FP128TyID: - case Type::X86_FP80TyID: - case Type::FP128TyID: - C = ConstantFP::get(F->getContext(), APFloat(ArgTy->getFltSemantics(), - AV.IntVal)); - break; - case Type::PointerTyID: - void *ArgPtr = GVTOP(AV); - if (sizeof(void*) == 4) - C = ConstantInt::get(Type::getInt32Ty(F->getContext()), - (int)(intptr_t)ArgPtr); - else - C = ConstantInt::get(Type::getInt64Ty(F->getContext()), - (intptr_t)ArgPtr); - // Cast the integer to pointer - C = ConstantExpr::getIntToPtr(C, ArgTy); - break; - } - Args.push_back(C); - } - - CallInst *TheCall = CallInst::Create(F, Args, "", StubBB); - TheCall->setCallingConv(F->getCallingConv()); - TheCall->setTailCall(); - if (!TheCall->getType()->isVoidTy()) - // Return result of the call. - ReturnInst::Create(F->getContext(), TheCall, StubBB); - else - ReturnInst::Create(F->getContext(), StubBB); // Just return void. - - // Finally, call our nullary stub function. - GenericValue Result = runFunction(Stub, std::vector()); - // Erase it, since no other function can have a reference to it. - Stub->eraseFromParent(); - // And return the result. - return Result; -} - -void JIT::RegisterJITEventListener(JITEventListener *L) { - if (!L) - return; - MutexGuard locked(lock); - EventListeners.push_back(L); -} -void JIT::UnregisterJITEventListener(JITEventListener *L) { - if (!L) - return; - MutexGuard locked(lock); - std::vector::reverse_iterator I= - std::find(EventListeners.rbegin(), EventListeners.rend(), L); - if (I != EventListeners.rend()) { - std::swap(*I, EventListeners.back()); - EventListeners.pop_back(); - } -} -void JIT::NotifyFunctionEmitted( - const Function &F, - void *Code, size_t Size, - const JITEvent_EmittedFunctionDetails &Details) { - MutexGuard locked(lock); - for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { - EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details); - } -} - -void JIT::NotifyFreeingMachineCode(void *OldPtr) { - MutexGuard locked(lock); - for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { - EventListeners[I]->NotifyFreeingMachineCode(OldPtr); - } -} - -/// runJITOnFunction - Run the FunctionPassManager full of -/// just-in-time compilation passes on F, hopefully filling in -/// GlobalAddress[F] with the address of F's machine code. -/// -void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { - MutexGuard locked(lock); - - class MCIListener : public JITEventListener { - MachineCodeInfo *const MCI; - public: - MCIListener(MachineCodeInfo *mci) : MCI(mci) {} - void NotifyFunctionEmitted(const Function &, void *Code, size_t Size, - const EmittedFunctionDetails &) override { - MCI->setAddress(Code); - MCI->setSize(Size); - } - }; - MCIListener MCIL(MCI); - if (MCI) - RegisterJITEventListener(&MCIL); - - runJITOnFunctionUnlocked(F); - - if (MCI) - UnregisterJITEventListener(&MCIL); -} - -void JIT::runJITOnFunctionUnlocked(Function *F) { - assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); - - jitTheFunctionUnlocked(F); - - // If the function referred to another function that had not yet been - // read from bitcode, and we are jitting non-lazily, emit it now. - while (!jitstate->getPendingFunctions().empty()) { - Function *PF = jitstate->getPendingFunctions().back(); - jitstate->getPendingFunctions().pop_back(); - - assert(!PF->hasAvailableExternallyLinkage() && - "Externally-defined function should not be in pending list."); - - jitTheFunctionUnlocked(PF); - - // Now that the function has been jitted, ask the JITEmitter to rewrite - // the stub with real address of the function. - updateFunctionStubUnlocked(PF); - } -} - -void JIT::jitTheFunctionUnlocked(Function *F) { - isAlreadyCodeGenerating = true; - jitstate->getPM().run(*F); - isAlreadyCodeGenerating = false; - - // clear basic block addresses after this function is done - getBasicBlockAddressMap().clear(); -} - -/// getPointerToFunction - This method is used to get the address of the -/// specified function, compiling it if necessary. -/// -void *JIT::getPointerToFunction(Function *F) { - - if (void *Addr = getPointerToGlobalIfAvailable(F)) - return Addr; // Check if function already code gen'd - - MutexGuard locked(lock); - - // Now that this thread owns the lock, make sure we read in the function if it - // exists in this Module. - std::string ErrorMsg; - if (F->Materialize(&ErrorMsg)) { - report_fatal_error("Error reading function '" + F->getName()+ - "' from bitcode file: " + ErrorMsg); - } - - // ... and check if another thread has already code gen'd the function. - if (void *Addr = getPointerToGlobalIfAvailable(F)) - return Addr; - - if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { - bool AbortOnFailure = !F->hasExternalWeakLinkage(); - void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); - addGlobalMapping(F, Addr); - return Addr; - } - - runJITOnFunctionUnlocked(F); - - void *Addr = getPointerToGlobalIfAvailable(F); - assert(Addr && "Code generation didn't add function to GlobalAddress table!"); - return Addr; -} - -void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { - MutexGuard locked(lock); - - BasicBlockAddressMapTy::iterator I = - getBasicBlockAddressMap().find(BB); - if (I == getBasicBlockAddressMap().end()) { - getBasicBlockAddressMap()[BB] = Addr; - } else { - // ignore repeats: some BBs can be split into few MBBs? - } -} - -void JIT::clearPointerToBasicBlock(const BasicBlock *BB) { - MutexGuard locked(lock); - getBasicBlockAddressMap().erase(BB); -} - -void *JIT::getPointerToBasicBlock(BasicBlock *BB) { - // make sure it's function is compiled by JIT - (void)getPointerToFunction(BB->getParent()); - - // resolve basic block address - MutexGuard locked(lock); - - BasicBlockAddressMapTy::iterator I = - getBasicBlockAddressMap().find(BB); - if (I != getBasicBlockAddressMap().end()) { - return I->second; - } else { - llvm_unreachable("JIT does not have BB address for address-of-label, was" - " it eliminated by optimizer?"); - } -} - -void *JIT::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure){ - if (!isSymbolSearchingDisabled()) { - void *ptr = JMM->getPointerToNamedFunction(Name, false); - if (ptr) - return ptr; - } - - /// If a LazyFunctionCreator is installed, use it to get/create the function. - if (LazyFunctionCreator) - if (void *RP = LazyFunctionCreator(Name)) - return RP; - - if (AbortOnFailure) { - report_fatal_error("Program used external function '"+Name+ - "' which could not be resolved!"); - } - return nullptr; -} - - -/// getOrEmitGlobalVariable - Return the address of the specified global -/// variable, possibly emitting it to memory if needed. This is used by the -/// Emitter. -void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { - MutexGuard locked(lock); - - void *Ptr = getPointerToGlobalIfAvailable(GV); - if (Ptr) return Ptr; - - // If the global is external, just remember the address. - if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) { -#if HAVE___DSO_HANDLE - if (GV->getName() == "__dso_handle") - return (void*)&__dso_handle; -#endif - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName()); - if (!Ptr) { - report_fatal_error("Could not resolve external global address: " - +GV->getName()); - } - addGlobalMapping(GV, Ptr); - } else { - // If the global hasn't been emitted to memory yet, allocate space and - // emit it into memory. - Ptr = getMemoryForGV(GV); - addGlobalMapping(GV, Ptr); - EmitGlobalVariable(GV); // Initialize the variable. - } - return Ptr; -} - -/// recompileAndRelinkFunction - This method is used to force a function -/// which has already been compiled, to be compiled again, possibly -/// after it has been modified. Then the entry to the old copy is overwritten -/// with a branch to the new copy. If there was no old copy, this acts -/// just like JIT::getPointerToFunction(). -/// -void *JIT::recompileAndRelinkFunction(Function *F) { - void *OldAddr = getPointerToGlobalIfAvailable(F); - - // If it's not already compiled there is no reason to patch it up. - if (!OldAddr) return getPointerToFunction(F); - - // Delete the old function mapping. - addGlobalMapping(F, nullptr); - - // Recodegen the function - runJITOnFunction(F); - - // Update state, forward the old function to the new function. - void *Addr = getPointerToGlobalIfAvailable(F); - assert(Addr && "Code generation didn't add function to GlobalAddress table!"); - TJI.replaceMachineCodeForFunction(OldAddr, Addr); - return Addr; -} - -/// getMemoryForGV - This method abstracts memory allocation of global -/// variable so that the JIT can allocate thread local variables depending -/// on the target. -/// -char* JIT::getMemoryForGV(const GlobalVariable* GV) { - char *Ptr; - - // GlobalVariable's which are not "constant" will cause trouble in a server - // situation. It's returned in the same block of memory as code which may - // not be writable. - if (isGVCompilationDisabled() && !GV->isConstant()) { - report_fatal_error("Compilation of non-internal GlobalValue is disabled!"); - } - - // Some applications require globals and code to live together, so they may - // be allocated into the same buffer, but in general globals are allocated - // through the memory manager which puts them near the code but not in the - // same buffer. - Type *GlobalType = GV->getType()->getElementType(); - size_t S = getDataLayout()->getTypeAllocSize(GlobalType); - size_t A = getDataLayout()->getPreferredAlignment(GV); - if (GV->isThreadLocal()) { - MutexGuard locked(lock); - Ptr = TJI.allocateThreadLocalMemory(S); - } else if (TJI.allocateSeparateGVMemory()) { - if (A <= 8) { - Ptr = (char*)malloc(S); - } else { - // Allocate S+A bytes of memory, then use an aligned pointer within that - // space. - Ptr = (char*)malloc(S+A); - unsigned MisAligned = ((intptr_t)Ptr & (A-1)); - Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0); - } - } else if (AllocateGVsWithCode) { - Ptr = (char*)JCE->allocateSpace(S, A); - } else { - Ptr = (char*)JCE->allocateGlobal(S, A); - } - return Ptr; -} - -void JIT::addPendingFunction(Function *F) { - MutexGuard locked(lock); - jitstate->getPendingFunctions().push_back(F); -} - - -JITEventListener::~JITEventListener() {} diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h deleted file mode 100644 index 69a7c36..0000000 --- a/lib/ExecutionEngine/JIT/JIT.h +++ /dev/null @@ -1,229 +0,0 @@ -//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the top-level JIT data structure. -// -//===----------------------------------------------------------------------===// - -#ifndef JIT_H -#define JIT_H - -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/PassManager.h" - -namespace llvm { - -class Function; -struct JITEvent_EmittedFunctionDetails; -class MachineCodeEmitter; -class MachineCodeInfo; -class TargetJITInfo; -class TargetMachine; - -class JITState { -private: - FunctionPassManager PM; // Passes to compile a function - Module *M; // Module used to create the PM - - /// PendingFunctions - Functions which have not been code generated yet, but - /// were called from a function being code generated. - std::vector > PendingFunctions; - -public: - explicit JITState(Module *M) : PM(M), M(M) {} - - FunctionPassManager &getPM() { - return PM; - } - - Module *getModule() const { return M; } - std::vector > &getPendingFunctions() { - return PendingFunctions; - } -}; - - -class JIT : public ExecutionEngine { - /// types - typedef ValueMap - BasicBlockAddressMapTy; - /// data - TargetMachine &TM; // The current target we are compiling to - TargetJITInfo &TJI; // The JITInfo for the target we are compiling to - JITCodeEmitter *JCE; // JCE object - JITMemoryManager *JMM; - std::vector EventListeners; - - /// AllocateGVsWithCode - Some applications require that global variables and - /// code be allocated into the same region of memory, in which case this flag - /// should be set to true. Doing so breaks freeMachineCodeForFunction. - bool AllocateGVsWithCode; - - /// True while the JIT is generating code. Used to assert against recursive - /// entry. - bool isAlreadyCodeGenerating; - - JITState *jitstate; - - /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their - /// actualized version, only filled for basic blocks that have their address - /// taken. - BasicBlockAddressMapTy BasicBlockAddressMap; - - - JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, bool AllocateGVsWithCode); -public: - ~JIT(); - - static void Register() { - JITCtor = createJIT; - } - - /// getJITInfo - Return the target JIT information structure. - /// - TargetJITInfo &getJITInfo() const { return TJI; } - - /// create - Create an return a new JIT compiler if there is one available - /// for the current target. Otherwise, return null. - /// - static ExecutionEngine *create(Module *M, - std::string *Err, - JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel = - CodeGenOpt::Default, - bool GVsWithCode = true, - Reloc::Model RM = Reloc::Default, - CodeModel::Model CMM = CodeModel::JITDefault) { - return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode, - RM, CMM); - } - - void addModule(Module *M) override; - - /// removeModule - Remove a Module from the list of modules. Returns true if - /// M is found. - bool removeModule(Module *M) override; - - /// runFunction - Start execution with the specified function and arguments. - /// - GenericValue runFunction(Function *F, - const std::vector &ArgValues) override; - - /// getPointerToNamedFunction - This method returns the address of the - /// specified function by using the MemoryManager. As such it is only - /// useful for resolving library symbols, not code generated symbols. - /// - /// If AbortOnFailure is false and no function with the given name is - /// found, this function silently returns a null pointer. Otherwise, - /// it prints a message to stderr and aborts. - /// - void *getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure = true) override; - - // CompilationCallback - Invoked the first time that a call site is found, - // which causes lazy compilation of the target function. - // - static void CompilationCallback(); - - /// getPointerToFunction - This returns the address of the specified function, - /// compiling it if necessary. - /// - void *getPointerToFunction(Function *F) override; - - /// addPointerToBasicBlock - Adds address of the specific basic block. - void addPointerToBasicBlock(const BasicBlock *BB, void *Addr); - - /// clearPointerToBasicBlock - Removes address of specific basic block. - void clearPointerToBasicBlock(const BasicBlock *BB); - - /// getPointerToBasicBlock - This returns the address of the specified basic - /// block, assuming function is compiled. - void *getPointerToBasicBlock(BasicBlock *BB) override; - - /// getOrEmitGlobalVariable - Return the address of the specified global - /// variable, possibly emitting it to memory if needed. This is used by the - /// Emitter. - void *getOrEmitGlobalVariable(const GlobalVariable *GV) override; - - /// getPointerToFunctionOrStub - If the specified function has been - /// code-gen'd, return a pointer to the function. If not, compile it, or use - /// a stub to implement lazy compilation if available. - /// - void *getPointerToFunctionOrStub(Function *F) override; - - /// recompileAndRelinkFunction - This method is used to force a function - /// which has already been compiled, to be compiled again, possibly - /// after it has been modified. Then the entry to the old copy is overwritten - /// with a branch to the new copy. If there was no old copy, this acts - /// just like JIT::getPointerToFunction(). - /// - void *recompileAndRelinkFunction(Function *F) override; - - /// freeMachineCodeForFunction - deallocate memory used to code-generate this - /// Function. - /// - void freeMachineCodeForFunction(Function *F) override; - - /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd - /// function was encountered. Add it to a pending list to be processed after - /// the current function. - /// - void addPendingFunction(Function *F); - - /// getCodeEmitter - Return the code emitter this JIT is emitting into. - /// - JITCodeEmitter *getCodeEmitter() const { return JCE; } - - static ExecutionEngine *createJIT(Module *M, - std::string *ErrorStr, - JITMemoryManager *JMM, - bool GVsWithCode, - TargetMachine *TM); - - // Run the JIT on F and return information about the generated code - void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override; - - void RegisterJITEventListener(JITEventListener *L) override; - void UnregisterJITEventListener(JITEventListener *L) override; - - TargetMachine *getTargetMachine() override { return &TM; } - - /// These functions correspond to the methods on JITEventListener. They - /// iterate over the registered listeners and call the corresponding method on - /// each. - void NotifyFunctionEmitted( - const Function &F, void *Code, size_t Size, - const JITEvent_EmittedFunctionDetails &Details); - void NotifyFreeingMachineCode(void *OldPtr); - - BasicBlockAddressMapTy & - getBasicBlockAddressMap() { - return BasicBlockAddressMap; - } - - -private: - static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, - TargetMachine &tm); - void runJITOnFunctionUnlocked(Function *F); - void updateFunctionStubUnlocked(Function *F); - void jitTheFunctionUnlocked(Function *F); - -protected: - - /// getMemoryforGV - Allocate memory for a global variable. - char* getMemoryForGV(const GlobalVariable* GV) override; - -}; - -} // End llvm namespace - -#endif diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp deleted file mode 100644 index 50b8c10..0000000 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ /dev/null @@ -1,1256 +0,0 @@ -//===-- JITEmitter.cpp - Write machine code to executable memory ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a MachineCodeEmitter object that is used by the JIT to -// write machine code to memory and remember where relocatable values are. -// -//===----------------------------------------------------------------------===// - -#include "JIT.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/MachineCodeInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/ExecutionEngine/JITEventListener.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/IR/ValueMap.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Disassembler.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/MutexGuard.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetJITInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include -#ifndef NDEBUG -#include -#endif -using namespace llvm; - -#define DEBUG_TYPE "jit" - -STATISTIC(NumBytes, "Number of bytes of machine code compiled"); -STATISTIC(NumRelos, "Number of relocations applied"); -STATISTIC(NumRetries, "Number of retries with more memory"); - - -// A declaration may stop being a declaration once it's fully read from bitcode. -// This function returns true if F is fully read and is still a declaration. -static bool isNonGhostDeclaration(const Function *F) { - return F->isDeclaration() && !F->isMaterializable(); -} - -//===----------------------------------------------------------------------===// -// JIT lazy compilation code. -// -namespace { - class JITEmitter; - class JITResolverState; - - template - struct NoRAUWValueMapConfig : public ValueMapConfig { - typedef JITResolverState *ExtraData; - static void onRAUW(JITResolverState *, Value *Old, Value *New) { - llvm_unreachable("The JIT doesn't know how to handle a" - " RAUW on a value it has emitted."); - } - }; - - struct CallSiteValueMapConfig : public NoRAUWValueMapConfig { - typedef JITResolverState *ExtraData; - static void onDelete(JITResolverState *JRS, Function *F); - }; - - class JITResolverState { - public: - typedef ValueMap > - FunctionToLazyStubMapTy; - typedef std::map > CallSiteToFunctionMapTy; - typedef ValueMap, - CallSiteValueMapConfig> FunctionToCallSitesMapTy; - typedef std::map, void*> GlobalToIndirectSymMapTy; - private: - /// FunctionToLazyStubMap - Keep track of the lazy stub created for a - /// particular function so that we can reuse them if necessary. - FunctionToLazyStubMapTy FunctionToLazyStubMap; - - /// CallSiteToFunctionMap - Keep track of the function that each lazy call - /// site corresponds to, and vice versa. - CallSiteToFunctionMapTy CallSiteToFunctionMap; - FunctionToCallSitesMapTy FunctionToCallSitesMap; - - /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a - /// particular GlobalVariable so that we can reuse them if necessary. - GlobalToIndirectSymMapTy GlobalToIndirectSymMap; - -#ifndef NDEBUG - /// Instance of the JIT this ResolverState serves. - JIT *TheJIT; -#endif - - public: - JITResolverState(JIT *jit) : FunctionToLazyStubMap(this), - FunctionToCallSitesMap(this) { -#ifndef NDEBUG - TheJIT = jit; -#endif - } - - FunctionToLazyStubMapTy& getFunctionToLazyStubMap() { - return FunctionToLazyStubMap; - } - - GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap() { - return GlobalToIndirectSymMap; - } - - std::pair LookupFunctionFromCallSite( - void *CallSite) const { - // The address given to us for the stub may not be exactly right, it - // might be a little bit after the stub. As such, use upper_bound to - // find it. - CallSiteToFunctionMapTy::const_iterator I = - CallSiteToFunctionMap.upper_bound(CallSite); - assert(I != CallSiteToFunctionMap.begin() && - "This is not a known call site!"); - --I; - return *I; - } - - void AddCallSite(void *CallSite, Function *F) { - bool Inserted = CallSiteToFunctionMap.insert( - std::make_pair(CallSite, F)).second; - (void)Inserted; - assert(Inserted && "Pair was already in CallSiteToFunctionMap"); - FunctionToCallSitesMap[F].insert(CallSite); - } - - void EraseAllCallSitesForPrelocked(Function *F); - - // Erases _all_ call sites regardless of their function. This is used to - // unregister the stub addresses from the StubToResolverMap in - // ~JITResolver(). - void EraseAllCallSitesPrelocked(); - }; - - /// JITResolver - Keep track of, and resolve, call sites for functions that - /// have not yet been compiled. - class JITResolver { - typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy; - typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy; - typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy; - - /// LazyResolverFn - The target lazy resolver function that we actually - /// rewrite instructions to use. - TargetJITInfo::LazyResolverFn LazyResolverFn; - - JITResolverState state; - - /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap - /// for external functions. TODO: Of course, external functions don't need - /// a lazy stub. It's actually here to make it more likely that far calls - /// succeed, but no single stub can guarantee that. I'll remove this in a - /// subsequent checkin when I actually fix far calls. - std::map ExternalFnToStubMap; - - /// revGOTMap - map addresses to indexes in the GOT - std::map revGOTMap; - unsigned nextGOTIndex; - - JITEmitter &JE; - - /// Instance of JIT corresponding to this Resolver. - JIT *TheJIT; - - public: - explicit JITResolver(JIT &jit, JITEmitter &je) - : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) { - LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn); - } - - ~JITResolver(); - - /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's - /// lazy-compilation stub if it has already been created. - void *getLazyFunctionStubIfAvailable(Function *F); - - /// getLazyFunctionStub - This returns a pointer to a function's - /// lazy-compilation stub, creating one on demand as needed. - void *getLazyFunctionStub(Function *F); - - /// getExternalFunctionStub - Return a stub for the function at the - /// specified address, created lazily on demand. - void *getExternalFunctionStub(void *FnAddr); - - /// getGlobalValueIndirectSym - Return an indirect symbol containing the - /// specified GV address. - void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress); - - /// getGOTIndexForAddress - Return a new or existing index in the GOT for - /// an address. This function only manages slots, it does not manage the - /// contents of the slots or the memory associated with the GOT. - unsigned getGOTIndexForAddr(void *addr); - - /// JITCompilerFn - This function is called to resolve a stub to a compiled - /// address. If the LLVM Function corresponding to the stub has not yet - /// been compiled, this function compiles it first. - static void *JITCompilerFn(void *Stub); - }; - - class StubToResolverMapTy { - /// Map a stub address to a specific instance of a JITResolver so that - /// lazily-compiled functions can find the right resolver to use. - /// - /// Guarded by Lock. - std::map Map; - - /// Guards Map from concurrent accesses. - mutable sys::Mutex Lock; - - public: - /// Registers a Stub to be resolved by Resolver. - void RegisterStubResolver(void *Stub, JITResolver *Resolver) { - MutexGuard guard(Lock); - Map.insert(std::make_pair(Stub, Resolver)); - } - /// Unregisters the Stub when it's invalidated. - void UnregisterStubResolver(void *Stub) { - MutexGuard guard(Lock); - Map.erase(Stub); - } - /// Returns the JITResolver instance that owns the Stub. - JITResolver *getResolverFromStub(void *Stub) const { - MutexGuard guard(Lock); - // The address given to us for the stub may not be exactly right, it might - // be a little bit after the stub. As such, use upper_bound to find it. - // This is the same trick as in LookupFunctionFromCallSite from - // JITResolverState. - std::map::const_iterator I = Map.upper_bound(Stub); - assert(I != Map.begin() && "This is not a known stub!"); - --I; - return I->second; - } - /// True if any stubs refer to the given resolver. Only used in an assert(). - /// O(N) - bool ResolverHasStubs(JITResolver* Resolver) const { - MutexGuard guard(Lock); - for (std::map::const_iterator I = Map.begin(), - E = Map.end(); I != E; ++I) { - if (I->second == Resolver) - return true; - } - return false; - } - }; - /// This needs to be static so that a lazy call stub can access it with no - /// context except the address of the stub. - ManagedStatic StubToResolverMap; - - /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is - /// used to output functions to memory for execution. - class JITEmitter : public JITCodeEmitter { - JITMemoryManager *MemMgr; - - // When outputting a function stub in the context of some other function, we - // save BufferBegin/BufferEnd/CurBufferPtr here. - uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; - - // When reattempting to JIT a function after running out of space, we store - // the estimated size of the function we're trying to JIT here, so we can - // ask the memory manager for at least this much space. When we - // successfully emit the function, we reset this back to zero. - uintptr_t SizeEstimate; - - /// Relocations - These are the relocations that the function needs, as - /// emitted. - std::vector Relocations; - - /// MBBLocations - This vector is a mapping from MBB ID's to their address. - /// It is filled in by the StartMachineBasicBlock callback and queried by - /// the getMachineBasicBlockAddress callback. - std::vector MBBLocations; - - /// ConstantPool - The constant pool for the current function. - /// - MachineConstantPool *ConstantPool; - - /// ConstantPoolBase - A pointer to the first entry in the constant pool. - /// - void *ConstantPoolBase; - - /// ConstPoolAddresses - Addresses of individual constant pool entries. - /// - SmallVector ConstPoolAddresses; - - /// JumpTable - The jump tables for the current function. - /// - MachineJumpTableInfo *JumpTable; - - /// JumpTableBase - A pointer to the first entry in the jump table. - /// - void *JumpTableBase; - - /// Resolver - This contains info about the currently resolved functions. - JITResolver Resolver; - - /// LabelLocations - This vector is a mapping from Label ID's to their - /// address. - DenseMap LabelLocations; - - /// MMI - Machine module info for exception informations - MachineModuleInfo* MMI; - - // CurFn - The llvm function being emitted. Only valid during - // finishFunction(). - const Function *CurFn; - - /// Information about emitted code, which is passed to the - /// JITEventListeners. This is reset in startFunction and used in - /// finishFunction. - JITEvent_EmittedFunctionDetails EmissionDetails; - - struct EmittedCode { - void *FunctionBody; // Beginning of the function's allocation. - void *Code; // The address the function's code actually starts at. - void *ExceptionTable; - EmittedCode() : FunctionBody(nullptr), Code(nullptr), - ExceptionTable(nullptr) {} - }; - struct EmittedFunctionConfig : public ValueMapConfig { - typedef JITEmitter *ExtraData; - static void onDelete(JITEmitter *, const Function*); - static void onRAUW(JITEmitter *, const Function*, const Function*); - }; - ValueMap EmittedFunctions; - - DebugLoc PrevDL; - - /// Instance of the JIT - JIT *TheJIT; - - public: - JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) - : SizeEstimate(0), Resolver(jit, *this), MMI(nullptr), CurFn(nullptr), - EmittedFunctions(this), TheJIT(&jit) { - MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); - if (jit.getJITInfo().needsGOT()) { - MemMgr->AllocateGOT(); - DEBUG(dbgs() << "JIT is managing a GOT\n"); - } - - } - ~JITEmitter() { - delete MemMgr; - } - - JITResolver &getJITResolver() { return Resolver; } - - void startFunction(MachineFunction &F) override; - bool finishFunction(MachineFunction &F) override; - - void emitConstantPool(MachineConstantPool *MCP); - void initJumpTableInfo(MachineJumpTableInfo *MJTI); - void emitJumpTableInfo(MachineJumpTableInfo *MJTI); - - void startGVStub(const GlobalValue* GV, - unsigned StubSize, unsigned Alignment = 1); - void startGVStub(void *Buffer, unsigned StubSize); - void finishGVStub(); - void *allocIndirectGV(const GlobalValue *GV, const uint8_t *Buffer, - size_t Size, unsigned Alignment) override; - - /// allocateSpace - Reserves space in the current block if any, or - /// allocate a new one of the given size. - void *allocateSpace(uintptr_t Size, unsigned Alignment) override; - - /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, - /// this method does not allocate memory in the current output buffer, - /// because a global may live longer than the current function. - void *allocateGlobal(uintptr_t Size, unsigned Alignment) override; - - void addRelocation(const MachineRelocation &MR) override { - Relocations.push_back(MR); - } - - void StartMachineBasicBlock(MachineBasicBlock *MBB) override { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCValue(); - if (MBB->hasAddressTaken()) - TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(), - (void*)getCurrentPCValue()); - DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" - << (void*) getCurrentPCValue() << "]\n"); - } - - uintptr_t getConstantPoolEntryAddress(unsigned Entry) const override; - uintptr_t getJumpTableEntryAddress(unsigned Entry) const override; - - uintptr_t - getMachineBasicBlockAddress(MachineBasicBlock *MBB) const override { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; - } - - /// retryWithMoreMemory - Log a retry and deallocate all memory for the - /// given function. Increase the minimum allocation size so that we get - /// more memory next time. - void retryWithMoreMemory(MachineFunction &F); - - /// deallocateMemForFunction - Deallocate all memory for the specified - /// function body. - void deallocateMemForFunction(const Function *F); - - void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) override; - - void emitLabel(MCSymbol *Label) override { - LabelLocations[Label] = getCurrentPCValue(); - } - - DenseMap *getLabelLocations() override { - return &LabelLocations; - } - - uintptr_t getLabelAddress(MCSymbol *Label) const override { - assert(LabelLocations.count(Label) && "Label not emitted!"); - return LabelLocations.find(Label)->second; - } - - void setModuleInfo(MachineModuleInfo* Info) override { - MMI = Info; - } - - private: - void *getPointerToGlobal(GlobalValue *GV, void *Reference, - bool MayNeedFarStub); - void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference); - }; -} - -void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { - JRS->EraseAllCallSitesForPrelocked(F); -} - -void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) { - FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); - if (F2C == FunctionToCallSitesMap.end()) - return; - StubToResolverMapTy &S2RMap = *StubToResolverMap; - for (SmallPtrSet::const_iterator I = F2C->second.begin(), - E = F2C->second.end(); I != E; ++I) { - S2RMap.UnregisterStubResolver(*I); - bool Erased = CallSiteToFunctionMap.erase(*I); - (void)Erased; - assert(Erased && "Missing call site->function mapping"); - } - FunctionToCallSitesMap.erase(F2C); -} - -void JITResolverState::EraseAllCallSitesPrelocked() { - StubToResolverMapTy &S2RMap = *StubToResolverMap; - for (CallSiteToFunctionMapTy::const_iterator - I = CallSiteToFunctionMap.begin(), - E = CallSiteToFunctionMap.end(); I != E; ++I) { - S2RMap.UnregisterStubResolver(I->first); - } - CallSiteToFunctionMap.clear(); - FunctionToCallSitesMap.clear(); -} - -JITResolver::~JITResolver() { - // No need to lock because we're in the destructor, and state isn't shared. - state.EraseAllCallSitesPrelocked(); - assert(!StubToResolverMap->ResolverHasStubs(this) && - "Resolver destroyed with stubs still alive."); -} - -/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub -/// if it has already been created. -void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { - MutexGuard locked(TheJIT->lock); - - // If we already have a stub for this function, recycle it. - return state.getFunctionToLazyStubMap().lookup(F); -} - -/// getFunctionStub - This returns a pointer to a function stub, creating -/// one on demand as needed. -void *JITResolver::getLazyFunctionStub(Function *F) { - MutexGuard locked(TheJIT->lock); - - // If we already have a lazy stub for this function, recycle it. - void *&Stub = state.getFunctionToLazyStubMap()[F]; - if (Stub) return Stub; - - // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we - // must resolve the symbol now. - void *Actual = TheJIT->isCompilingLazily() - ? (void *)(intptr_t)LazyResolverFn : (void *)nullptr; - - // If this is an external declaration, attempt to resolve the address now - // to place in the stub. - if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) { - Actual = TheJIT->getPointerToFunction(F); - - // If we resolved the symbol to a null address (eg. a weak external) - // don't emit a stub. Return a null pointer to the application. - if (!Actual) return nullptr; - } - - TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); - JE.startGVStub(F, SL.Size, SL.Alignment); - // Codegen a new stub, calling the lazy resolver or the actual address of the - // external function, if it was resolved. - Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE); - JE.finishGVStub(); - - if (Actual != (void*)(intptr_t)LazyResolverFn) { - // If we are getting the stub for an external function, we really want the - // address of the stub in the GlobalAddressMap for the JIT, not the address - // of the external function. - TheJIT->updateGlobalMapping(F, Stub); - } - - DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" - << F->getName() << "'\n"); - - if (TheJIT->isCompilingLazily()) { - // Register this JITResolver as the one corresponding to this call site so - // JITCompilerFn will be able to find it. - StubToResolverMap->RegisterStubResolver(Stub, this); - - // Finally, keep track of the stub-to-Function mapping so that the - // JITCompilerFn knows which function to compile! - state.AddCallSite(Stub, F); - } else if (!Actual) { - // If we are JIT'ing non-lazily but need to call a function that does not - // exist yet, add it to the JIT's work list so that we can fill in the - // stub address later. - assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() && - "'Actual' should have been set above."); - TheJIT->addPendingFunction(F); - } - - return Stub; -} - -/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified -/// GV address. -void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { - MutexGuard locked(TheJIT->lock); - - // If we already have a stub for this global variable, recycle it. - void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV]; - if (IndirectSym) return IndirectSym; - - // Otherwise, codegen a new indirect symbol. - IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress, - JE); - - DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym - << "] for GV '" << GV->getName() << "'\n"); - - return IndirectSym; -} - -/// getExternalFunctionStub - Return a stub for the function at the -/// specified address, created lazily on demand. -void *JITResolver::getExternalFunctionStub(void *FnAddr) { - // If we already have a stub for this function, recycle it. - void *&Stub = ExternalFnToStubMap[FnAddr]; - if (Stub) return Stub; - - TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); - JE.startGVStub(nullptr, SL.Size, SL.Alignment); - Stub = TheJIT->getJITInfo().emitFunctionStub(nullptr, FnAddr, JE); - JE.finishGVStub(); - - DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub - << "] for external function at '" << FnAddr << "'\n"); - return Stub; -} - -unsigned JITResolver::getGOTIndexForAddr(void* addr) { - unsigned idx = revGOTMap[addr]; - if (!idx) { - idx = ++nextGOTIndex; - revGOTMap[addr] = idx; - DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr [" - << addr << "]\n"); - } - return idx; -} - -/// JITCompilerFn - This function is called when a lazy compilation stub has -/// been entered. It looks up which function this stub corresponds to, compiles -/// it if necessary, then returns the resultant function pointer. -void *JITResolver::JITCompilerFn(void *Stub) { - JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub); - assert(JR && "Unable to find the corresponding JITResolver to the call site"); - - Function* F = nullptr; - void* ActualPtr = nullptr; - - { - // Only lock for getting the Function. The call getPointerToFunction made - // in this function might trigger function materializing, which requires - // JIT lock to be unlocked. - MutexGuard locked(JR->TheJIT->lock); - - // The address given to us for the stub may not be exactly right, it might - // be a little bit after the stub. As such, use upper_bound to find it. - std::pair I = - JR->state.LookupFunctionFromCallSite(Stub); - F = I.second; - ActualPtr = I.first; - } - - // If we have already code generated the function, just return the address. - void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F); - - if (!Result) { - // Otherwise we don't have it, do lazy compilation now. - - // If lazy compilation is disabled, emit a useful error message and abort. - if (!JR->TheJIT->isCompilingLazily()) { - report_fatal_error("LLVM JIT requested to do lazy compilation of" - " function '" - + F->getName() + "' when lazy compiles are disabled!"); - } - - DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName() - << "' In stub ptr = " << Stub << " actual ptr = " - << ActualPtr << "\n"); - (void)ActualPtr; - - Result = JR->TheJIT->getPointerToFunction(F); - } - - // Reacquire the lock to update the GOT map. - MutexGuard locked(JR->TheJIT->lock); - - // We might like to remove the call site from the CallSiteToFunction map, but - // we can't do that! Multiple threads could be stuck, waiting to acquire the - // lock above. As soon as the 1st function finishes compiling the function, - // the next one will be released, and needs to be able to find the function it - // needs to call. - - // FIXME: We could rewrite all references to this stub if we knew them. - - // What we will do is set the compiled function address to map to the - // same GOT entry as the stub so that later clients may update the GOT - // if they see it still using the stub address. - // Note: this is done so the Resolver doesn't have to manage GOT memory - // Do this without allocating map space if the target isn't using a GOT - if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end()) - JR->revGOTMap[Result] = JR->revGOTMap[Stub]; - - return Result; -} - -//===----------------------------------------------------------------------===// -// JITEmitter code. -// - -static GlobalObject *getSimpleAliasee(Constant *C) { - C = C->stripPointerCasts(); - return dyn_cast(C); -} - -void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, - bool MayNeedFarStub) { - if (GlobalVariable *GV = dyn_cast(V)) - return TheJIT->getOrEmitGlobalVariable(GV); - - if (GlobalAlias *GA = dyn_cast(V)) { - // We can only handle simple cases. - if (GlobalValue *GV = getSimpleAliasee(GA->getAliasee())) - return TheJIT->getPointerToGlobal(GV); - return nullptr; - } - - // If we have already compiled the function, return a pointer to its body. - Function *F = cast(V); - - void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F); - if (FnStub) { - // Return the function stub if it's already created. We do this first so - // that we're returning the same address for the function as any previous - // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be - // close enough to call. - return FnStub; - } - - // If we know the target can handle arbitrary-distance calls, try to - // return a direct pointer. - if (!MayNeedFarStub) { - // If we have code, go ahead and return that. - void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); - if (ResultPtr) return ResultPtr; - - // If this is an external function pointer, we can force the JIT to - // 'compile' it, which really just adds it to the map. - if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) - return TheJIT->getPointerToFunction(F); - } - - // Otherwise, we may need a to emit a stub, and, conservatively, we always do - // so. Note that it's possible to return null from getLazyFunctionStub in the - // case of a weak extern that fails to resolve. - return Resolver.getLazyFunctionStub(F); -} - -void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { - // Make sure GV is emitted first, and create a stub containing the fully - // resolved address. - void *GVAddress = getPointerToGlobal(V, Reference, false); - void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress); - return StubAddr; -} - -void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { - if (DL.isUnknown()) return; - if (!BeforePrintingInsn) return; - - const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext(); - - if (DL.getScope(Context) != nullptr && PrevDL != DL) { - JITEvent_EmittedFunctionDetails::LineStart NextLine; - NextLine.Address = getCurrentPCValue(); - NextLine.Loc = DL; - EmissionDetails.LineStarts.push_back(NextLine); - } - - PrevDL = DL; -} - -static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, - const DataLayout *TD) { - const std::vector &Constants = MCP->getConstants(); - if (Constants.empty()) return 0; - - unsigned Size = 0; - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - MachineConstantPoolEntry CPE = Constants[i]; - unsigned AlignMask = CPE.getAlignment() - 1; - Size = (Size + AlignMask) & ~AlignMask; - Type *Ty = CPE.getType(); - Size += TD->getTypeAllocSize(Ty); - } - return Size; -} - -void JITEmitter::startFunction(MachineFunction &F) { - DEBUG(dbgs() << "JIT: Starting CodeGen of Function " - << F.getName() << "\n"); - - uintptr_t ActualSize = 0; - // Set the memory writable, if it's not already - MemMgr->setMemoryWritable(); - - if (SizeEstimate > 0) { - // SizeEstimate will be non-zero on reallocation attempts. - ActualSize = SizeEstimate; - } - - BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(), - ActualSize); - BufferEnd = BufferBegin+ActualSize; - EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin; - - // Ensure the constant pool/jump table info is at least 4-byte aligned. - emitAlignment(16); - - emitConstantPool(F.getConstantPool()); - if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) - initJumpTableInfo(MJTI); - - // About to start emitting the machine code for the function. - emitAlignment(std::max(F.getFunction()->getAlignment(), 8U)); - TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr); - EmittedFunctions[F.getFunction()].Code = CurBufferPtr; - - MBBLocations.clear(); - - EmissionDetails.MF = &F; - EmissionDetails.LineStarts.clear(); -} - -bool JITEmitter::finishFunction(MachineFunction &F) { - if (CurBufferPtr == BufferEnd) { - // We must call endFunctionBody before retrying, because - // deallocateMemForFunction requires it. - MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); - retryWithMoreMemory(F); - return true; - } - - if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) - emitJumpTableInfo(MJTI); - - // FnStart is the start of the text, not the start of the constant pool and - // other per-function data. - uint8_t *FnStart = - (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction()); - - // FnEnd is the end of the function's machine code. - uint8_t *FnEnd = CurBufferPtr; - - if (!Relocations.empty()) { - CurFn = F.getFunction(); - NumRelos += Relocations.size(); - - // Resolve the relocations to concrete pointers. - for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { - MachineRelocation &MR = Relocations[i]; - void *ResultPtr = nullptr; - if (!MR.letTargetResolve()) { - if (MR.isExternalSymbol()) { - ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), - false); - DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" - << ResultPtr << "]\n"); - - // If the target REALLY wants a stub for this function, emit it now. - if (MR.mayNeedFarStub()) { - ResultPtr = Resolver.getExternalFunctionStub(ResultPtr); - } - } else if (MR.isGlobalValue()) { - ResultPtr = getPointerToGlobal(MR.getGlobalValue(), - BufferBegin+MR.getMachineCodeOffset(), - MR.mayNeedFarStub()); - } else if (MR.isIndirectSymbol()) { - ResultPtr = getPointerToGVIndirectSym( - MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset()); - } else if (MR.isBasicBlock()) { - ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock()); - } else if (MR.isConstantPoolIndex()) { - ResultPtr = - (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex()); - } else { - assert(MR.isJumpTableIndex()); - ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex()); - } - - MR.setResultPointer(ResultPtr); - } - - // if we are managing the GOT and the relocation wants an index, - // give it one - if (MR.isGOTRelative() && MemMgr->isManagingGOT()) { - unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr); - MR.setGOTIndex(idx); - if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) { - DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr - << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] - << "\n"); - ((void**)MemMgr->getGOTBase())[idx] = ResultPtr; - } - } - } - - CurFn = nullptr; - TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0], - Relocations.size(), MemMgr->getGOTBase()); - } - - // Update the GOT entry for F to point to the new code. - if (MemMgr->isManagingGOT()) { - unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin); - if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) { - DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin - << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] - << "\n"); - ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin; - } - } - - // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for - // global variables that were referenced in the relocations. - MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); - - if (CurBufferPtr == BufferEnd) { - retryWithMoreMemory(F); - return true; - } else { - // Now that we've succeeded in emitting the function, reset the - // SizeEstimate back down to zero. - SizeEstimate = 0; - } - - BufferBegin = CurBufferPtr = nullptr; - NumBytes += FnEnd-FnStart; - - // Invalidate the icache if necessary. - sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart); - - TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart, - EmissionDetails); - - // Reset the previous debug location. - PrevDL = DebugLoc(); - - DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart - << "] Function: " << F.getName() - << ": " << (FnEnd-FnStart) << " bytes of text, " - << Relocations.size() << " relocations\n"); - - Relocations.clear(); - ConstPoolAddresses.clear(); - - // Mark code region readable and executable if it's not so already. - MemMgr->setMemoryExecutable(); - - DEBUG({ - if (sys::hasDisassembler()) { - dbgs() << "JIT: Disassembled code:\n"; - dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart, - (uintptr_t)FnStart); - } else { - dbgs() << "JIT: Binary code:\n"; - uint8_t* q = FnStart; - for (int i = 0; q < FnEnd; q += 4, ++i) { - if (i == 4) - i = 0; - if (i == 0) - dbgs() << "JIT: " << (long)(q - FnStart) << ": "; - bool Done = false; - for (int j = 3; j >= 0; --j) { - if (q + j >= FnEnd) - Done = true; - else - dbgs() << (unsigned short)q[j]; - } - if (Done) - break; - dbgs() << ' '; - if (i == 3) - dbgs() << '\n'; - } - dbgs()<< '\n'; - } - }); - - if (MMI) - MMI->EndFunction(); - - return false; -} - -void JITEmitter::retryWithMoreMemory(MachineFunction &F) { - DEBUG(dbgs() << "JIT: Ran out of space for native code. Reattempting.\n"); - Relocations.clear(); // Clear the old relocations or we'll reapply them. - ConstPoolAddresses.clear(); - ++NumRetries; - deallocateMemForFunction(F.getFunction()); - // Try again with at least twice as much free space. - SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin)); - - for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){ - if (MBB->hasAddressTaken()) - TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock()); - } -} - -/// deallocateMemForFunction - Deallocate all memory for the specified -/// function body. Also drop any references the function has to stubs. -/// May be called while the Function is being destroyed inside ~Value(). -void JITEmitter::deallocateMemForFunction(const Function *F) { - ValueMap::iterator - Emitted = EmittedFunctions.find(F); - if (Emitted != EmittedFunctions.end()) { - MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody); - TheJIT->NotifyFreeingMachineCode(Emitted->second.Code); - - EmittedFunctions.erase(Emitted); - } -} - - -void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { - if (BufferBegin) - return JITCodeEmitter::allocateSpace(Size, Alignment); - - // create a new memory block if there is no active one. - // care must be taken so that BufferBegin is invalidated when a - // block is trimmed - BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment); - BufferEnd = BufferBegin+Size; - return CurBufferPtr; -} - -void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) { - // Delegate this call through the memory manager. - return MemMgr->allocateGlobal(Size, Alignment); -} - -void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { - if (TheJIT->getJITInfo().hasCustomConstantPool()) - return; - - const std::vector &Constants = MCP->getConstants(); - if (Constants.empty()) return; - - unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getDataLayout()); - unsigned Align = MCP->getConstantPoolAlignment(); - ConstantPoolBase = allocateSpace(Size, Align); - ConstantPool = MCP; - - if (!ConstantPoolBase) return; // Buffer overflow. - - DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase - << "] (size: " << Size << ", alignment: " << Align << ")\n"); - - // Initialize the memory for all of the constant pool entries. - unsigned Offset = 0; - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - MachineConstantPoolEntry CPE = Constants[i]; - unsigned AlignMask = CPE.getAlignment() - 1; - Offset = (Offset + AlignMask) & ~AlignMask; - - uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset; - ConstPoolAddresses.push_back(CAddr); - if (CPE.isMachineConstantPoolEntry()) { - // FIXME: add support to lower machine constant pool values into bytes! - report_fatal_error("Initialize memory with machine specific constant pool" - "entry has not been implemented!"); - } - TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr); - DEBUG(dbgs() << "JIT: CP" << i << " at [0x"; - dbgs().write_hex(CAddr) << "]\n"); - - Type *Ty = CPE.Val.ConstVal->getType(); - Offset += TheJIT->getDataLayout()->getTypeAllocSize(Ty); - } -} - -void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) { - if (TheJIT->getJITInfo().hasCustomJumpTables()) - return; - if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) - return; - - const std::vector &JT = MJTI->getJumpTables(); - if (JT.empty()) return; - - unsigned NumEntries = 0; - for (unsigned i = 0, e = JT.size(); i != e; ++i) - NumEntries += JT[i].MBBs.size(); - - unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getDataLayout()); - - // Just allocate space for all the jump tables now. We will fix up the actual - // MBB entries in the tables after we emit the code for each block, since then - // we will know the final locations of the MBBs in memory. - JumpTable = MJTI; - JumpTableBase = allocateSpace(NumEntries * EntrySize, - MJTI->getEntryAlignment(*TheJIT->getDataLayout())); -} - -void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { - if (TheJIT->getJITInfo().hasCustomJumpTables()) - return; - - const std::vector &JT = MJTI->getJumpTables(); - if (JT.empty() || !JumpTableBase) return; - - - switch (MJTI->getEntryKind()) { - case MachineJumpTableInfo::EK_Inline: - return; - case MachineJumpTableInfo::EK_BlockAddress: { - // EK_BlockAddress - Each entry is a plain address of block, e.g.: - // .word LBB123 - assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == sizeof(void*) && - "Cross JIT'ing?"); - - // For each jump table, map each target in the jump table to the address of - // an emitted MachineBasicBlock. - intptr_t *SlotPtr = (intptr_t*)JumpTableBase; - - for (unsigned i = 0, e = JT.size(); i != e; ++i) { - const std::vector &MBBs = JT[i].MBBs; - // Store the address of the basic block for this jump table slot in the - // memory we allocated for the jump table in 'initJumpTableInfo' - for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) - *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]); - } - break; - } - - case MachineJumpTableInfo::EK_Custom32: - case MachineJumpTableInfo::EK_GPRel32BlockAddress: - case MachineJumpTableInfo::EK_LabelDifference32: { - assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == 4&&"Cross JIT'ing?"); - // For each jump table, place the offset from the beginning of the table - // to the target address. - int *SlotPtr = (int*)JumpTableBase; - - for (unsigned i = 0, e = JT.size(); i != e; ++i) { - const std::vector &MBBs = JT[i].MBBs; - // Store the offset of the basic block for this jump table slot in the - // memory we allocated for the jump table in 'initJumpTableInfo' - uintptr_t Base = (uintptr_t)SlotPtr; - for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { - uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]); - /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook. - *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base); - } - } - break; - } - case MachineJumpTableInfo::EK_GPRel64BlockAddress: - llvm_unreachable( - "JT Info emission not implemented for GPRel64BlockAddress yet."); - } -} - -void JITEmitter::startGVStub(const GlobalValue* GV, - unsigned StubSize, unsigned Alignment) { - SavedBufferBegin = BufferBegin; - SavedBufferEnd = BufferEnd; - SavedCurBufferPtr = CurBufferPtr; - - BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment); - BufferEnd = BufferBegin+StubSize+1; -} - -void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) { - SavedBufferBegin = BufferBegin; - SavedBufferEnd = BufferEnd; - SavedCurBufferPtr = CurBufferPtr; - - BufferBegin = CurBufferPtr = (uint8_t *)Buffer; - BufferEnd = BufferBegin+StubSize+1; -} - -void JITEmitter::finishGVStub() { - assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space."); - NumBytes += getCurrentPCOffset(); - BufferBegin = SavedBufferBegin; - BufferEnd = SavedBufferEnd; - CurBufferPtr = SavedCurBufferPtr; -} - -void *JITEmitter::allocIndirectGV(const GlobalValue *GV, - const uint8_t *Buffer, size_t Size, - unsigned Alignment) { - uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment); - memcpy(IndGV, Buffer, Size); - return IndGV; -} - -// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry -// in the constant pool that was last emitted with the 'emitConstantPool' -// method. -// -uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const { - assert(ConstantNum < ConstantPool->getConstants().size() && - "Invalid ConstantPoolIndex!"); - return ConstPoolAddresses[ConstantNum]; -} - -// getJumpTableEntryAddress - Return the address of the JumpTable with index -// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo' -// -uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const { - const std::vector &JT = JumpTable->getJumpTables(); - assert(Index < JT.size() && "Invalid jump table index!"); - - unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getDataLayout()); - - unsigned Offset = 0; - for (unsigned i = 0; i < Index; ++i) - Offset += JT[i].MBBs.size(); - - Offset *= EntrySize; - - return (uintptr_t)((char *)JumpTableBase + Offset); -} - -void JITEmitter::EmittedFunctionConfig::onDelete( - JITEmitter *Emitter, const Function *F) { - Emitter->deallocateMemForFunction(F); -} -void JITEmitter::EmittedFunctionConfig::onRAUW( - JITEmitter *, const Function*, const Function*) { - llvm_unreachable("The JIT doesn't know how to handle a" - " RAUW on a value it has emitted."); -} - - -//===----------------------------------------------------------------------===// -// Public interface to this file -//===----------------------------------------------------------------------===// - -JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM, - TargetMachine &tm) { - return new JITEmitter(jit, JMM, tm); -} - -// getPointerToFunctionOrStub - If the specified function has been -// code-gen'd, return a pointer to the function. If not, compile it, or use -// a stub to implement lazy compilation if available. -// -void *JIT::getPointerToFunctionOrStub(Function *F) { - // If we have already code generated the function, just return the address. - if (void *Addr = getPointerToGlobalIfAvailable(F)) - return Addr; - - // Get a stub if the target supports it. - JITEmitter *JE = static_cast(getCodeEmitter()); - return JE->getJITResolver().getLazyFunctionStub(F); -} - -void JIT::updateFunctionStubUnlocked(Function *F) { - // Get the empty stub we generated earlier. - JITEmitter *JE = static_cast(getCodeEmitter()); - void *Stub = JE->getJITResolver().getLazyFunctionStub(F); - void *Addr = getPointerToGlobalIfAvailable(F); - assert(Addr != Stub && "Function must have non-stub address to be updated."); - - // Tell the target jit info to rewrite the stub at the specified address, - // rather than creating a new one. - TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout(); - JE->startGVStub(Stub, layout.Size); - getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter()); - JE->finishGVStub(); -} - -/// freeMachineCodeForFunction - release machine code memory for given Function. -/// -void JIT::freeMachineCodeForFunction(Function *F) { - // Delete translation for this from the ExecutionEngine, so it will get - // retranslated next time it is used. - updateGlobalMapping(F, nullptr); - - // Free the actual memory for the function body and related stuff. - static_cast(JCE)->deallocateMemForFunction(F); -} diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp deleted file mode 100644 index 584b93f..0000000 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ /dev/null @@ -1,904 +0,0 @@ -//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the DefaultJITMemoryManager class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Config/config.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include - -#if defined(__linux__) -#if defined(HAVE_SYS_STAT_H) -#include -#endif -#include -#include -#endif - -using namespace llvm; - -#define DEBUG_TYPE "jit" - -STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT"); - -JITMemoryManager::~JITMemoryManager() {} - -//===----------------------------------------------------------------------===// -// Memory Block Implementation. -//===----------------------------------------------------------------------===// - -namespace { - /// MemoryRangeHeader - For a range of memory, this is the header that we put - /// on the block of memory. It is carefully crafted to be one word of memory. - /// Allocated blocks have just this header, free'd blocks have FreeRangeHeader - /// which starts with this. - struct FreeRangeHeader; - struct MemoryRangeHeader { - /// ThisAllocated - This is true if this block is currently allocated. If - /// not, this can be converted to a FreeRangeHeader. - unsigned ThisAllocated : 1; - - /// PrevAllocated - Keep track of whether the block immediately before us is - /// allocated. If not, the word immediately before this header is the size - /// of the previous block. - unsigned PrevAllocated : 1; - - /// BlockSize - This is the size in bytes of this memory block, - /// including this header. - uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2); - - - /// getBlockAfter - Return the memory block immediately after this one. - /// - MemoryRangeHeader &getBlockAfter() const { - return *reinterpret_cast( - reinterpret_cast( - const_cast(this))+BlockSize); - } - - /// getFreeBlockBefore - If the block before this one is free, return it, - /// otherwise return null. - FreeRangeHeader *getFreeBlockBefore() const { - if (PrevAllocated) return nullptr; - intptr_t PrevSize = reinterpret_cast( - const_cast(this))[-1]; - return reinterpret_cast( - reinterpret_cast( - const_cast(this))-PrevSize); - } - - /// FreeBlock - Turn an allocated block into a free block, adjusting - /// bits in the object headers, and adding an end of region memory block. - FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList); - - /// TrimAllocationToSize - If this allocated block is significantly larger - /// than NewSize, split it into two pieces (where the former is NewSize - /// bytes, including the header), and add the new block to the free list. - FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList, - uint64_t NewSize); - }; - - /// FreeRangeHeader - For a memory block that isn't already allocated, this - /// keeps track of the current block and has a pointer to the next free block. - /// Free blocks are kept on a circularly linked list. - struct FreeRangeHeader : public MemoryRangeHeader { - FreeRangeHeader *Prev; - FreeRangeHeader *Next; - - /// getMinBlockSize - Get the minimum size for a memory block. Blocks - /// smaller than this size cannot be created. - static unsigned getMinBlockSize() { - return sizeof(FreeRangeHeader)+sizeof(intptr_t); - } - - /// SetEndOfBlockSizeMarker - The word at the end of every free block is - /// known to be the size of the free block. Set it for this block. - void SetEndOfBlockSizeMarker() { - void *EndOfBlock = (char*)this + BlockSize; - ((intptr_t *)EndOfBlock)[-1] = BlockSize; - } - - FreeRangeHeader *RemoveFromFreeList() { - assert(Next->Prev == this && Prev->Next == this && "Freelist broken!"); - Next->Prev = Prev; - return Prev->Next = Next; - } - - void AddToFreeList(FreeRangeHeader *FreeList) { - Next = FreeList; - Prev = FreeList->Prev; - Prev->Next = this; - Next->Prev = this; - } - - /// GrowBlock - The block after this block just got deallocated. Merge it - /// into the current block. - void GrowBlock(uintptr_t NewSize); - - /// AllocateBlock - Mark this entire block allocated, updating freelists - /// etc. This returns a pointer to the circular free-list. - FreeRangeHeader *AllocateBlock(); - }; -} - - -/// AllocateBlock - Mark this entire block allocated, updating freelists -/// etc. This returns a pointer to the circular free-list. -FreeRangeHeader *FreeRangeHeader::AllocateBlock() { - assert(!ThisAllocated && !getBlockAfter().PrevAllocated && - "Cannot allocate an allocated block!"); - // Mark this block allocated. - ThisAllocated = 1; - getBlockAfter().PrevAllocated = 1; - - // Remove it from the free list. - return RemoveFromFreeList(); -} - -/// FreeBlock - Turn an allocated block into a free block, adjusting -/// bits in the object headers, and adding an end of region memory block. -/// If possible, coalesce this block with neighboring blocks. Return the -/// FreeRangeHeader to allocate from. -FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) { - MemoryRangeHeader *FollowingBlock = &getBlockAfter(); - assert(ThisAllocated && "This block is already free!"); - assert(FollowingBlock->PrevAllocated && "Flags out of sync!"); - - FreeRangeHeader *FreeListToReturn = FreeList; - - // If the block after this one is free, merge it into this block. - if (!FollowingBlock->ThisAllocated) { - FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock; - // "FreeList" always needs to be a valid free block. If we're about to - // coalesce with it, update our notion of what the free list is. - if (&FollowingFreeBlock == FreeList) { - FreeList = FollowingFreeBlock.Next; - FreeListToReturn = nullptr; - assert(&FollowingFreeBlock != FreeList && "No tombstone block?"); - } - FollowingFreeBlock.RemoveFromFreeList(); - - // Include the following block into this one. - BlockSize += FollowingFreeBlock.BlockSize; - FollowingBlock = &FollowingFreeBlock.getBlockAfter(); - - // Tell the block after the block we are coalescing that this block is - // allocated. - FollowingBlock->PrevAllocated = 1; - } - - assert(FollowingBlock->ThisAllocated && "Missed coalescing?"); - - if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) { - PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize); - return FreeListToReturn ? FreeListToReturn : PrevFreeBlock; - } - - // Otherwise, mark this block free. - FreeRangeHeader &FreeBlock = *(FreeRangeHeader*)this; - FollowingBlock->PrevAllocated = 0; - FreeBlock.ThisAllocated = 0; - - // Link this into the linked list of free blocks. - FreeBlock.AddToFreeList(FreeList); - - // Add a marker at the end of the block, indicating the size of this free - // block. - FreeBlock.SetEndOfBlockSizeMarker(); - return FreeListToReturn ? FreeListToReturn : &FreeBlock; -} - -/// GrowBlock - The block after this block just got deallocated. Merge it -/// into the current block. -void FreeRangeHeader::GrowBlock(uintptr_t NewSize) { - assert(NewSize > BlockSize && "Not growing block?"); - BlockSize = NewSize; - SetEndOfBlockSizeMarker(); - getBlockAfter().PrevAllocated = 0; -} - -/// TrimAllocationToSize - If this allocated block is significantly larger -/// than NewSize, split it into two pieces (where the former is NewSize -/// bytes, including the header), and add the new block to the free list. -FreeRangeHeader *MemoryRangeHeader:: -TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) { - assert(ThisAllocated && getBlockAfter().PrevAllocated && - "Cannot deallocate part of an allocated block!"); - - // Don't allow blocks to be trimmed below minimum required size - NewSize = std::max(FreeRangeHeader::getMinBlockSize(), NewSize); - - // Round up size for alignment of header. - unsigned HeaderAlign = __alignof(FreeRangeHeader); - NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1); - - // Size is now the size of the block we will remove from the start of the - // current block. - assert(NewSize <= BlockSize && - "Allocating more space from this block than exists!"); - - // If splitting this block will cause the remainder to be too small, do not - // split the block. - if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize()) - return FreeList; - - // Otherwise, we splice the required number of bytes out of this block, form - // a new block immediately after it, then mark this block allocated. - MemoryRangeHeader &FormerNextBlock = getBlockAfter(); - - // Change the size of this block. - BlockSize = NewSize; - - // Get the new block we just sliced out and turn it into a free block. - FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter(); - NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock; - NewNextBlock.ThisAllocated = 0; - NewNextBlock.PrevAllocated = 1; - NewNextBlock.SetEndOfBlockSizeMarker(); - FormerNextBlock.PrevAllocated = 0; - NewNextBlock.AddToFreeList(FreeList); - return &NewNextBlock; -} - -//===----------------------------------------------------------------------===// -// Memory Block Implementation. -//===----------------------------------------------------------------------===// - -namespace { - - class DefaultJITMemoryManager; - - class JITAllocator { - DefaultJITMemoryManager &JMM; - public: - JITAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { } - void *Allocate(size_t Size, size_t /*Alignment*/); - void Deallocate(void *Slab, size_t Size); - }; - - /// DefaultJITMemoryManager - Manage memory for the JIT code generation. - /// This splits a large block of MAP_NORESERVE'd memory into two - /// sections, one for function stubs, one for the functions themselves. We - /// have to do this because we may need to emit a function stub while in the - /// middle of emitting a function, and we don't know how large the function we - /// are emitting is. - class DefaultJITMemoryManager : public JITMemoryManager { - public: - /// DefaultCodeSlabSize - When we have to go map more memory, we allocate at - /// least this much unless more is requested. Currently, in 512k slabs. - static const size_t DefaultCodeSlabSize = 512 * 1024; - - /// DefaultSlabSize - Allocate globals and stubs into slabs of 64K (probably - /// 16 pages) unless we get an allocation above SizeThreshold. - static const size_t DefaultSlabSize = 64 * 1024; - - /// DefaultSizeThreshold - For any allocation larger than 16K (probably - /// 4 pages), we should allocate a separate slab to avoid wasted space at - /// the end of a normal slab. - static const size_t DefaultSizeThreshold = 16 * 1024; - - private: - // Whether to poison freed memory. - bool PoisonMemory; - - /// LastSlab - This points to the last slab allocated and is used as the - /// NearBlock parameter to AllocateRWX so that we can attempt to lay out all - /// stubs, data, and code contiguously in memory. In general, however, this - /// is not possible because the NearBlock parameter is ignored on Windows - /// platforms and even on Unix it works on a best-effort pasis. - sys::MemoryBlock LastSlab; - - // Memory slabs allocated by the JIT. We refer to them as slabs so we don't - // confuse them with the blocks of memory described above. - std::vector CodeSlabs; - BumpPtrAllocatorImpl StubAllocator; - BumpPtrAllocatorImpl DataAllocator; - - // Circular list of free blocks. - FreeRangeHeader *FreeMemoryList; - - // When emitting code into a memory block, this is the block. - MemoryRangeHeader *CurBlock; - - uint8_t *GOTBase; // Target Specific reserved memory - public: - DefaultJITMemoryManager(); - ~DefaultJITMemoryManager(); - - /// allocateNewSlab - Allocates a new MemoryBlock and remembers it as the - /// last slab it allocated, so that subsequent allocations follow it. - sys::MemoryBlock allocateNewSlab(size_t size); - - /// getPointerToNamedFunction - This method returns the address of the - /// specified function by using the dlsym function call. - void *getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure = true) override; - - void AllocateGOT() override; - - // Testing methods. - bool CheckInvariants(std::string &ErrorStr) override; - size_t GetDefaultCodeSlabSize() override { return DefaultCodeSlabSize; } - size_t GetDefaultDataSlabSize() override { return DefaultSlabSize; } - size_t GetDefaultStubSlabSize() override { return DefaultSlabSize; } - unsigned GetNumCodeSlabs() override { return CodeSlabs.size(); } - unsigned GetNumDataSlabs() override { return DataAllocator.GetNumSlabs(); } - unsigned GetNumStubSlabs() override { return StubAllocator.GetNumSlabs(); } - - /// startFunctionBody - When a function starts, allocate a block of free - /// executable memory, returning a pointer to it and its actual size. - uint8_t *startFunctionBody(const Function *F, - uintptr_t &ActualSize) override { - - FreeRangeHeader* candidateBlock = FreeMemoryList; - FreeRangeHeader* head = FreeMemoryList; - FreeRangeHeader* iter = head->Next; - - uintptr_t largest = candidateBlock->BlockSize; - - // Search for the largest free block - while (iter != head) { - if (iter->BlockSize > largest) { - largest = iter->BlockSize; - candidateBlock = iter; - } - iter = iter->Next; - } - - largest = largest - sizeof(MemoryRangeHeader); - - // If this block isn't big enough for the allocation desired, allocate - // another block of memory and add it to the free list. - if (largest < ActualSize || - largest <= FreeRangeHeader::getMinBlockSize()) { - DEBUG(dbgs() << "JIT: Allocating another slab of memory for function."); - candidateBlock = allocateNewCodeSlab((size_t)ActualSize); - } - - // Select this candidate block for allocation - CurBlock = candidateBlock; - - // Allocate the entire memory block. - FreeMemoryList = candidateBlock->AllocateBlock(); - ActualSize = CurBlock->BlockSize - sizeof(MemoryRangeHeader); - return (uint8_t *)(CurBlock + 1); - } - - /// allocateNewCodeSlab - Helper method to allocate a new slab of code - /// memory from the OS and add it to the free list. Returns the new - /// FreeRangeHeader at the base of the slab. - FreeRangeHeader *allocateNewCodeSlab(size_t MinSize) { - // If the user needs at least MinSize free memory, then we account for - // two MemoryRangeHeaders: the one in the user's block, and the one at the - // end of the slab. - size_t PaddedMin = MinSize + 2 * sizeof(MemoryRangeHeader); - size_t SlabSize = std::max(DefaultCodeSlabSize, PaddedMin); - sys::MemoryBlock B = allocateNewSlab(SlabSize); - CodeSlabs.push_back(B); - char *MemBase = (char*)(B.base()); - - // Put a tiny allocated block at the end of the memory chunk, so when - // FreeBlock calls getBlockAfter it doesn't fall off the end. - MemoryRangeHeader *EndBlock = - (MemoryRangeHeader*)(MemBase + B.size()) - 1; - EndBlock->ThisAllocated = 1; - EndBlock->PrevAllocated = 0; - EndBlock->BlockSize = sizeof(MemoryRangeHeader); - - // Start out with a vast new block of free memory. - FreeRangeHeader *NewBlock = (FreeRangeHeader*)MemBase; - NewBlock->ThisAllocated = 0; - // Make sure getFreeBlockBefore doesn't look into unmapped memory. - NewBlock->PrevAllocated = 1; - NewBlock->BlockSize = (uintptr_t)EndBlock - (uintptr_t)NewBlock; - NewBlock->SetEndOfBlockSizeMarker(); - NewBlock->AddToFreeList(FreeMemoryList); - - assert(NewBlock->BlockSize - sizeof(MemoryRangeHeader) >= MinSize && - "The block was too small!"); - return NewBlock; - } - - /// endFunctionBody - The function F is now allocated, and takes the memory - /// in the range [FunctionStart,FunctionEnd). - void endFunctionBody(const Function *F, uint8_t *FunctionStart, - uint8_t *FunctionEnd) override { - assert(FunctionEnd > FunctionStart); - assert(FunctionStart == (uint8_t *)(CurBlock+1) && - "Mismatched function start/end!"); - - uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock; - - // Release the memory at the end of this block that isn't needed. - FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); - } - - /// allocateSpace - Allocate a memory block of the given size. This method - /// cannot be called between calls to startFunctionBody and endFunctionBody. - uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) override { - CurBlock = FreeMemoryList; - FreeMemoryList = FreeMemoryList->AllocateBlock(); - - uint8_t *result = (uint8_t *)(CurBlock + 1); - - if (Alignment == 0) Alignment = 1; - result = (uint8_t*)(((intptr_t)result+Alignment-1) & - ~(intptr_t)(Alignment-1)); - - uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock; - FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); - - return result; - } - - /// allocateStub - Allocate memory for a function stub. - uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, - unsigned Alignment) override { - return (uint8_t*)StubAllocator.Allocate(StubSize, Alignment); - } - - /// allocateGlobal - Allocate memory for a global. - uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) override { - return (uint8_t*)DataAllocator.Allocate(Size, Alignment); - } - - /// allocateCodeSection - Allocate memory for a code section. - uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, - StringRef SectionName) override { - // Grow the required block size to account for the block header - Size += sizeof(*CurBlock); - - // Alignment handling. - if (!Alignment) - Alignment = 16; - Size += Alignment - 1; - - FreeRangeHeader* candidateBlock = FreeMemoryList; - FreeRangeHeader* head = FreeMemoryList; - FreeRangeHeader* iter = head->Next; - - uintptr_t largest = candidateBlock->BlockSize; - - // Search for the largest free block. - while (iter != head) { - if (iter->BlockSize > largest) { - largest = iter->BlockSize; - candidateBlock = iter; - } - iter = iter->Next; - } - - largest = largest - sizeof(MemoryRangeHeader); - - // If this block isn't big enough for the allocation desired, allocate - // another block of memory and add it to the free list. - if (largest < Size || largest <= FreeRangeHeader::getMinBlockSize()) { - DEBUG(dbgs() << "JIT: Allocating another slab of memory for function."); - candidateBlock = allocateNewCodeSlab((size_t)Size); - } - - // Select this candidate block for allocation - CurBlock = candidateBlock; - - // Allocate the entire memory block. - FreeMemoryList = candidateBlock->AllocateBlock(); - // Release the memory at the end of this block that isn't needed. - FreeMemoryList = CurBlock->TrimAllocationToSize(FreeMemoryList, Size); - uintptr_t unalignedAddr = (uintptr_t)CurBlock + sizeof(*CurBlock); - return (uint8_t*)RoundUpToAlignment((uint64_t)unalignedAddr, Alignment); - } - - /// allocateDataSection - Allocate memory for a data section. - uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, StringRef SectionName, - bool IsReadOnly) override { - return (uint8_t*)DataAllocator.Allocate(Size, Alignment); - } - - bool finalizeMemory(std::string *ErrMsg) override { - return false; - } - - uint8_t *getGOTBase() const override { - return GOTBase; - } - - void deallocateBlock(void *Block) { - // Find the block that is allocated for this function. - MemoryRangeHeader *MemRange = static_cast(Block) - 1; - assert(MemRange->ThisAllocated && "Block isn't allocated!"); - - // Fill the buffer with garbage! - if (PoisonMemory) { - memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange)); - } - - // Free the memory. - FreeMemoryList = MemRange->FreeBlock(FreeMemoryList); - } - - /// deallocateFunctionBody - Deallocate all memory for the specified - /// function body. - void deallocateFunctionBody(void *Body) override { - if (Body) deallocateBlock(Body); - } - - /// setMemoryWritable - When code generation is in progress, - /// the code pages may need permissions changed. - void setMemoryWritable() override { - for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i) - sys::Memory::setWritable(CodeSlabs[i]); - } - /// setMemoryExecutable - When code generation is done and we're ready to - /// start execution, the code pages may need permissions changed. - void setMemoryExecutable() override { - for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i) - sys::Memory::setExecutable(CodeSlabs[i]); - } - - /// setPoisonMemory - Controls whether we write garbage over freed memory. - /// - void setPoisonMemory(bool poison) override { - PoisonMemory = poison; - } - }; -} - -void *JITAllocator::Allocate(size_t Size, size_t /*Alignment*/) { - sys::MemoryBlock B = JMM.allocateNewSlab(Size); - return B.base(); -} - -void JITAllocator::Deallocate(void *Slab, size_t Size) { - sys::MemoryBlock B(Slab, Size); - sys::Memory::ReleaseRWX(B); -} - -DefaultJITMemoryManager::DefaultJITMemoryManager() - : -#ifdef NDEBUG - PoisonMemory(false), -#else - PoisonMemory(true), -#endif - LastSlab(nullptr, 0), StubAllocator(*this), DataAllocator(*this) { - - // Allocate space for code. - sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize); - CodeSlabs.push_back(MemBlock); - uint8_t *MemBase = (uint8_t*)MemBlock.base(); - - // We set up the memory chunk with 4 mem regions, like this: - // [ START - // [ Free #0 ] -> Large space to allocate functions from. - // [ Allocated #1 ] -> Tiny space to separate regions. - // [ Free #2 ] -> Tiny space so there is always at least 1 free block. - // [ Allocated #3 ] -> Tiny space to prevent looking past end of block. - // END ] - // - // The last three blocks are never deallocated or touched. - - // Add MemoryRangeHeader to the end of the memory region, indicating that - // the space after the block of memory is allocated. This is block #3. - MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1; - Mem3->ThisAllocated = 1; - Mem3->PrevAllocated = 0; - Mem3->BlockSize = sizeof(MemoryRangeHeader); - - /// Add a tiny free region so that the free list always has one entry. - FreeRangeHeader *Mem2 = - (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize()); - Mem2->ThisAllocated = 0; - Mem2->PrevAllocated = 1; - Mem2->BlockSize = FreeRangeHeader::getMinBlockSize(); - Mem2->SetEndOfBlockSizeMarker(); - Mem2->Prev = Mem2; // Mem2 *is* the free list for now. - Mem2->Next = Mem2; - - /// Add a tiny allocated region so that Mem2 is never coalesced away. - MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1; - Mem1->ThisAllocated = 1; - Mem1->PrevAllocated = 0; - Mem1->BlockSize = sizeof(MemoryRangeHeader); - - // Add a FreeRangeHeader to the start of the function body region, indicating - // that the space is free. Mark the previous block allocated so we never look - // at it. - FreeRangeHeader *Mem0 = (FreeRangeHeader*)MemBase; - Mem0->ThisAllocated = 0; - Mem0->PrevAllocated = 1; - Mem0->BlockSize = (char*)Mem1-(char*)Mem0; - Mem0->SetEndOfBlockSizeMarker(); - Mem0->AddToFreeList(Mem2); - - // Start out with the freelist pointing to Mem0. - FreeMemoryList = Mem0; - - GOTBase = nullptr; -} - -void DefaultJITMemoryManager::AllocateGOT() { - assert(!GOTBase && "Cannot allocate the got multiple times"); - GOTBase = new uint8_t[sizeof(void*) * 8192]; - HasGOT = true; -} - -DefaultJITMemoryManager::~DefaultJITMemoryManager() { - for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i) - sys::Memory::ReleaseRWX(CodeSlabs[i]); - - delete[] GOTBase; -} - -sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) { - // Allocate a new block close to the last one. - std::string ErrMsg; - sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : nullptr; - sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg); - if (!B.base()) { - report_fatal_error("Allocation failed when allocating new memory in the" - " JIT\n" + Twine(ErrMsg)); - } - LastSlab = B; - ++NumSlabs; - // Initialize the slab to garbage when debugging. - if (PoisonMemory) { - memset(B.base(), 0xCD, B.size()); - } - return B; -} - -/// CheckInvariants - For testing only. Return "" if all internal invariants -/// are preserved, and a helpful error message otherwise. For free and -/// allocated blocks, make sure that adding BlockSize gives a valid block. -/// For free blocks, make sure they're in the free list and that their end of -/// block size marker is correct. This function should return an error before -/// accessing bad memory. This function is defined here instead of in -/// JITMemoryManagerTest.cpp so that we don't have to expose all of the -/// implementation details of DefaultJITMemoryManager. -bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) { - raw_string_ostream Err(ErrorStr); - - // Construct a the set of FreeRangeHeader pointers so we can query it - // efficiently. - llvm::SmallPtrSet FreeHdrSet; - FreeRangeHeader* FreeHead = FreeMemoryList; - FreeRangeHeader* FreeRange = FreeHead; - - do { - // Check that the free range pointer is in the blocks we've allocated. - bool Found = false; - for (std::vector::iterator I = CodeSlabs.begin(), - E = CodeSlabs.end(); I != E && !Found; ++I) { - char *Start = (char*)I->base(); - char *End = Start + I->size(); - Found = (Start <= (char*)FreeRange && (char*)FreeRange < End); - } - if (!Found) { - Err << "Corrupt free list; points to " << FreeRange; - return false; - } - - if (FreeRange->Next->Prev != FreeRange) { - Err << "Next and Prev pointers do not match."; - return false; - } - - // Otherwise, add it to the set. - FreeHdrSet.insert(FreeRange); - FreeRange = FreeRange->Next; - } while (FreeRange != FreeHead); - - // Go over each block, and look at each MemoryRangeHeader. - for (std::vector::iterator I = CodeSlabs.begin(), - E = CodeSlabs.end(); I != E; ++I) { - char *Start = (char*)I->base(); - char *End = Start + I->size(); - - // Check each memory range. - for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = nullptr; - Start <= (char*)Hdr && (char*)Hdr < End; - Hdr = &Hdr->getBlockAfter()) { - if (Hdr->ThisAllocated == 0) { - // Check that this range is in the free list. - if (!FreeHdrSet.count(Hdr)) { - Err << "Found free header at " << Hdr << " that is not in free list."; - return false; - } - - // Now make sure the size marker at the end of the block is correct. - uintptr_t *Marker = ((uintptr_t*)&Hdr->getBlockAfter()) - 1; - if (!(Start <= (char*)Marker && (char*)Marker < End)) { - Err << "Block size in header points out of current MemoryBlock."; - return false; - } - if (Hdr->BlockSize != *Marker) { - Err << "End of block size marker (" << *Marker << ") " - << "and BlockSize (" << Hdr->BlockSize << ") don't match."; - return false; - } - } - - if (LastHdr && LastHdr->ThisAllocated != Hdr->PrevAllocated) { - Err << "Hdr->PrevAllocated (" << Hdr->PrevAllocated << ") != " - << "LastHdr->ThisAllocated (" << LastHdr->ThisAllocated << ")"; - return false; - } else if (!LastHdr && !Hdr->PrevAllocated) { - Err << "The first header should have PrevAllocated true."; - return false; - } - - // Remember the last header. - LastHdr = Hdr; - } - } - - // All invariants are preserved. - return true; -} - -//===----------------------------------------------------------------------===// -// getPointerToNamedFunction() implementation. -//===----------------------------------------------------------------------===// - -// AtExitHandlers - List of functions to call when the program exits, -// registered with the atexit() library function. -static std::vector AtExitHandlers; - -/// runAtExitHandlers - Run any functions registered by the program's -/// calls to atexit(3), which we intercept and store in -/// AtExitHandlers. -/// -static void runAtExitHandlers() { - while (!AtExitHandlers.empty()) { - void (*Fn)() = AtExitHandlers.back(); - AtExitHandlers.pop_back(); - Fn(); - } -} - -//===----------------------------------------------------------------------===// -// Function stubs that are invoked instead of certain library calls -// -// Force the following functions to be linked in to anything that uses the -// JIT. This is a hack designed to work around the all-too-clever Glibc -// strategy of making these functions work differently when inlined vs. when -// not inlined, and hiding their real definitions in a separate archive file -// that the dynamic linker can't see. For more info, search for -// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. -#if defined(__linux__) && defined(__GLIBC__) -/* stat functions are redirecting to __xstat with a version number. On x86-64 - * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' - * available as an exported symbol, so we have to add it explicitly. - */ -namespace { -class StatSymbols { -public: - StatSymbols() { - sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); - sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); - sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); - sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); - sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); - sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); - sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); - sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); - sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); - } -}; -} -static StatSymbols initStatSymbols; -#endif // __linux__ - -// jit_exit - Used to intercept the "exit" library call. -static void jit_exit(int Status) { - runAtExitHandlers(); // Run atexit handlers... - exit(Status); -} - -// jit_atexit - Used to intercept the "atexit" library call. -static int jit_atexit(void (*Fn)()) { - AtExitHandlers.push_back(Fn); // Take note of atexit handler... - return 0; // Always successful -} - -static int jit_noop() { - return 0; -} - -//===----------------------------------------------------------------------===// -// -/// getPointerToNamedFunction - This method returns the address of the specified -/// function by using the dynamic loader interface. As such it is only useful -/// for resolving library symbols, not code generated symbols. -/// -void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure) { - // Check to see if this is one of the functions we want to intercept. Note, - // we cast to intptr_t here to silence a -pedantic warning that complains - // about casting a function pointer to a normal pointer. - if (Name == "exit") return (void*)(intptr_t)&jit_exit; - if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; - - // We should not invoke parent's ctors/dtors from generated main()! - // On Mingw and Cygwin, the symbol __main is resolved to - // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors - // (and register wrong callee's dtors with atexit(3)). - // We expect ExecutionEngine::runStaticConstructorsDestructors() - // is called before ExecutionEngine::runFunctionAsMain() is called. - if (Name == "__main") return (void*)(intptr_t)&jit_noop; - - const char *NameStr = Name.c_str(); - // If this is an asm specifier, skip the sentinal. - if (NameStr[0] == 1) ++NameStr; - - // If it's an external function, look it up in the process image... - void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); - if (Ptr) return Ptr; - - // If it wasn't found and if it starts with an underscore ('_') character, - // try again without the underscore. - if (NameStr[0] == '_') { - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); - if (Ptr) return Ptr; - } - - // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These - // are references to hidden visibility symbols that dlsym cannot resolve. - // If we have one of these, strip off $LDBLStub and try again. -#if defined(__APPLE__) && defined(__ppc__) - if (Name.size() > 9 && Name[Name.size()-9] == '$' && - memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { - // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. - // This mirrors logic in libSystemStubs.a. - std::string Prefix = std::string(Name.begin(), Name.end()-9); - if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) - return Ptr; - if (void *Ptr = getPointerToNamedFunction(Prefix, false)) - return Ptr; - } -#endif - - if (AbortOnFailure) { - report_fatal_error("Program used external function '"+Name+ - "' which could not be resolved!"); - } - return nullptr; -} - - - -JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() { - return new DefaultJITMemoryManager(); -} - -const size_t DefaultJITMemoryManager::DefaultCodeSlabSize; -const size_t DefaultJITMemoryManager::DefaultSlabSize; -const size_t DefaultJITMemoryManager::DefaultSizeThreshold; diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt deleted file mode 100644 index dd22f1b..0000000 --- a/lib/ExecutionEngine/JIT/LLVMBuild.txt +++ /dev/null @@ -1,22 +0,0 @@ -;===- ./lib/ExecutionEngine/JIT/LLVMBuild.txt ------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = JIT -parent = ExecutionEngine -required_libraries = CodeGen Core ExecutionEngine Support diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile deleted file mode 100644 index aafa3d9..0000000 --- a/lib/ExecutionEngine/JIT/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -##===- lib/ExecutionEngine/JIT/Makefile --------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMJIT - -# Get the $(ARCH) setting -include $(LEVEL)/Makefile.config - -# Enable the X86 JIT if compiling on X86 -ifeq ($(ARCH), x86) - ENABLE_X86_JIT = 1 -endif - -# This flag can also be used on the command line to force inclusion -# of the X86 JIT on non-X86 hosts -ifdef ENABLE_X86_JIT - CPPFLAGS += -DENABLE_X86_JIT -endif - -# Enable the Sparc JIT if compiling on Sparc -ifeq ($(ARCH), Sparc) - ENABLE_SPARC_JIT = 1 -endif - -# This flag can also be used on the command line to force inclusion -# of the Sparc JIT on non-Sparc hosts -ifdef ENABLE_SPARC_JIT - CPPFLAGS += -DENABLE_SPARC_JIT -endif - -include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/JITEventListener.cpp b/lib/ExecutionEngine/JITEventListener.cpp new file mode 100644 index 0000000..2a6a007 --- /dev/null +++ b/lib/ExecutionEngine/JITEventListener.cpp @@ -0,0 +1,15 @@ +//===-- JITEventListener.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITEventListener.h" + +using namespace llvm; + +// Out-of-line definition of the virtual destructor as this is the key function. +JITEventListener::~JITEventListener() {} diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt index 6dc75af..ecae078 100644 --- a/lib/ExecutionEngine/LLVMBuild.txt +++ b/lib/ExecutionEngine/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT +subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT [component_0] type = Library diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index e9ba96a..da5f037 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -10,7 +10,6 @@ #include "MCJIT.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/ObjectBuffer.h" #include "llvm/ExecutionEngine/ObjectImage.h" @@ -28,6 +27,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MutexGuard.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -42,31 +42,23 @@ static struct RegisterJIT { extern "C" void LLVMLinkInMCJIT() { } -ExecutionEngine *MCJIT::createJIT(Module *M, +ExecutionEngine *MCJIT::createJIT(std::unique_ptr M, std::string *ErrorStr, RTDyldMemoryManager *MemMgr, - bool GVsWithCode, - TargetMachine *TM) { + std::unique_ptr TM) { // Try to register the program as a source of symbols to resolve against. // // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr); - return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(), - GVsWithCode); + return new MCJIT(std::move(M), std::move(TM), + MemMgr ? MemMgr : new SectionMemoryManager()); } -MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, - bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), Ctx(nullptr), MemMgr(this, MM), Dyld(&MemMgr), - ObjCache(nullptr) { - - OwnedModules.addModule(m); - setDataLayout(TM->getDataLayout()); -} - -MCJIT::~MCJIT() { - MutexGuard locked(lock); +MCJIT::MCJIT(std::unique_ptr M, std::unique_ptr tm, + RTDyldMemoryManager *MM) + : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr), + MemMgr(this, MM), Dyld(&MemMgr), ObjCache(nullptr) { // FIXME: We are managing our modules, so we do not want the base class // ExecutionEngine to manage them as well. To avoid double destruction // of the first (and only) module added in ExecutionEngine constructor @@ -77,33 +69,28 @@ MCJIT::~MCJIT() { // If so, additional functions: addModule, removeModule, FindFunctionNamed, // runStaticConstructorsDestructors could be moved back to EE as well. // + std::unique_ptr First = std::move(Modules[0]); Modules.clear(); + + OwnedModules.addModule(std::move(First)); + setDataLayout(TM->getSubtargetImpl()->getDataLayout()); +} + +MCJIT::~MCJIT() { + MutexGuard locked(lock); + Dyld.deregisterEHFrames(); - LoadedObjectList::iterator it, end; - for (it = LoadedObjects.begin(), end = LoadedObjects.end(); it != end; ++it) { - ObjectImage *Obj = *it; - if (Obj) { + for (auto &Obj : LoadedObjects) + if (Obj) NotifyFreeingObject(*Obj); - delete Obj; - } - } - LoadedObjects.clear(); - - SmallVector::iterator ArIt, ArEnd; - for (ArIt = Archives.begin(), ArEnd = Archives.end(); ArIt != ArEnd; ++ArIt) { - object::Archive *A = *ArIt; - delete A; - } Archives.clear(); - - delete TM; } -void MCJIT::addModule(Module *M) { +void MCJIT::addModule(std::unique_ptr M) { MutexGuard locked(lock); - OwnedModules.addModule(M); + OwnedModules.addModule(std::move(M)); } bool MCJIT::removeModule(Module *M) { @@ -111,29 +98,34 @@ bool MCJIT::removeModule(Module *M) { return OwnedModules.removeModule(M); } - - void MCJIT::addObjectFile(std::unique_ptr Obj) { - ObjectImage *LoadedObject = Dyld.loadObject(std::move(Obj)); + std::unique_ptr LoadedObject = Dyld.loadObject(std::move(Obj)); if (!LoadedObject || Dyld.hasError()) report_fatal_error(Dyld.getErrorString()); - LoadedObjects.push_back(LoadedObject); - NotifyObjectEmitted(*LoadedObject); + + LoadedObjects.push_back(std::move(LoadedObject)); } -void MCJIT::addArchive(object::Archive *A) { - Archives.push_back(A); +void MCJIT::addObjectFile(object::OwningBinary Obj) { + std::unique_ptr ObjFile; + std::unique_ptr MemBuf; + std::tie(ObjFile, MemBuf) = Obj.takeBinary(); + addObjectFile(std::move(ObjFile)); + Buffers.push_back(std::move(MemBuf)); } +void MCJIT::addArchive(object::OwningBinary A) { + Archives.push_back(std::move(A)); +} void MCJIT::setObjectCache(ObjectCache* NewCache) { MutexGuard locked(lock); ObjCache = NewCache; } -ObjectBufferStream* MCJIT::emitObject(Module *M) { +std::unique_ptr MCJIT::emitObject(Module *M) { MutexGuard locked(lock); // This must be a module which has already been added but not loaded to this @@ -142,8 +134,8 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) { PassManager PM; - M->setDataLayout(TM->getDataLayout()); - PM.add(new DataLayoutPass(M)); + M->setDataLayout(TM->getSubtargetImpl()->getDataLayout()); + PM.add(new DataLayoutPass()); // The RuntimeDyld will take ownership of this shortly std::unique_ptr CompiledObject(new ObjectBufferStream()); @@ -165,11 +157,11 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) { if (ObjCache) { // MemoryBuffer is a thin wrapper around the actual memory, so it's OK // to create a temporary object here and delete it after the call. - std::unique_ptr MB(CompiledObject->getMemBuffer()); - ObjCache->notifyObjectCompiled(M, MB.get()); + MemoryBufferRef MB = CompiledObject->getMemBuffer(); + ObjCache->notifyObjectCompiled(M, MB); } - return CompiledObject.release(); + return CompiledObject; } void MCJIT::generateCodeForModule(Module *M) { @@ -187,21 +179,22 @@ void MCJIT::generateCodeForModule(Module *M) { std::unique_ptr ObjectToLoad; // Try to load the pre-compiled object from cache if possible if (ObjCache) { - std::unique_ptr PreCompiledObject(ObjCache->getObject(M)); - if (PreCompiledObject.get()) - ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.release())); + if (std::unique_ptr PreCompiledObject = + ObjCache->getObject(M)) + ObjectToLoad = + llvm::make_unique(std::move(PreCompiledObject)); } // If the cache did not contain a suitable object, compile the object if (!ObjectToLoad) { - ObjectToLoad.reset(emitObject(M)); - assert(ObjectToLoad.get() && "Compilation did not produce an object."); + ObjectToLoad = emitObject(M); + assert(ObjectToLoad && "Compilation did not produce an object."); } // Load the object into the dynamic linker. // MCJIT now owns the ObjectImage pointer (via its LoadedObjects list). - ObjectImage *LoadedObject = Dyld.loadObject(ObjectToLoad.release()); - LoadedObjects.push_back(LoadedObject); + std::unique_ptr LoadedObject = + Dyld.loadObject(std::move(ObjectToLoad)); if (!LoadedObject) report_fatal_error(Dyld.getErrorString()); @@ -210,6 +203,8 @@ void MCJIT::generateCodeForModule(Module *M) { NotifyObjectEmitted(*LoadedObject); + LoadedObjects.push_back(std::move(LoadedObject)); + OwnedModules.markModuleAsLoaded(M); } @@ -232,12 +227,14 @@ void MCJIT::finalizeLoadedModules() { void MCJIT::finalizeObject() { MutexGuard locked(lock); - for (ModulePtrSet::iterator I = OwnedModules.begin_added(), - E = OwnedModules.end_added(); - I != E; ++I) { - Module *M = *I; + // Generate code for module is going to move objects out of the 'added' list, + // so we need to copy that out before using it: + SmallVector ModsToAdd; + for (auto M : OwnedModules.added()) + ModsToAdd.push_back(M); + + for (auto M : ModsToAdd) generateCodeForModule(M); - } finalizeLoadedModules(); } @@ -255,12 +252,8 @@ void MCJIT::finalizeModule(Module *M) { finalizeLoadedModules(); } -void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { - report_fatal_error("not yet implemented"); -} - uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { - Mangler Mang(TM->getDataLayout()); + Mangler Mang(TM->getSubtargetImpl()->getDataLayout()); SmallString<128> FullName; Mang.getNameWithPrefix(FullName, Name); return Dyld.getSymbolLoadAddress(FullName); @@ -299,9 +292,8 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, if (Addr) return Addr; - SmallVector::iterator I, E; - for (I = Archives.begin(), E = Archives.end(); I != E; ++I) { - object::Archive *A = *I; + for (object::OwningBinary &OB : Archives) { + object::Archive *A = OB.getBinary(); // Look for our symbols in each Archive object::Archive::child_iterator ChildIt = A->findSym(Name); if (ChildIt != A->child_end()) { @@ -310,7 +302,7 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, ChildIt->getAsBinary(); if (ChildBinOrErr.getError()) continue; - std::unique_ptr ChildBin = std::move(ChildBinOrErr.get()); + std::unique_ptr &ChildBin = ChildBinOrErr.get(); if (ChildBin->isObject()) { std::unique_ptr OF( static_cast(ChildBin.release())); @@ -326,13 +318,19 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, // If it hasn't already been generated, see if it's in one of our modules. Module *M = findModuleForSymbol(Name, CheckFunctionsOnly); - if (!M) - return 0; + if (M) { + generateCodeForModule(M); + + // Check the RuntimeDyld table again, it should be there now. + return getExistingSymbolAddress(Name); + } - generateCodeForModule(M); + // If a LazyFunctionCreator is installed, use it to get/create the function. + // FIXME: Should we instead have a LazySymbolCreator callback? + if (LazyFunctionCreator) + Addr = (uint64_t)LazyFunctionCreator(Name); - // Check the RuntimeDyld table again, it should be there now. - return getExistingSymbolAddress(Name); + return Addr; } uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { @@ -355,10 +353,14 @@ uint64_t MCJIT::getFunctionAddress(const std::string &Name) { void *MCJIT::getPointerToFunction(Function *F) { MutexGuard locked(lock); + Mangler Mang(TM->getSubtargetImpl()->getDataLayout()); + SmallString<128> Name; + TM->getNameWithPrefix(Name, F, Mang); + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { bool AbortOnFailure = !F->hasExternalWeakLinkage(); - void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); - addGlobalMapping(F, Addr); + void *Addr = getPointerToNamedFunction(Name, AbortOnFailure); + updateGlobalMapping(F, Addr); return Addr; } @@ -368,32 +370,25 @@ void *MCJIT::getPointerToFunction(Function *F) { // Make sure the relevant module has been compiled and loaded. if (HasBeenAddedButNotLoaded) generateCodeForModule(M); - else if (!OwnedModules.hasModuleBeenLoaded(M)) + else if (!OwnedModules.hasModuleBeenLoaded(M)) { // If this function doesn't belong to one of our modules, we're done. + // FIXME: Asking for the pointer to a function that hasn't been registered, + // and isn't a declaration (which is handled above) should probably + // be an assertion. return nullptr; + } // FIXME: Should the Dyld be retaining module information? Probably not. // // This is the accessor for the target address, so make sure to check the // load address of the symbol, not the local address. - Mangler Mang(TM->getDataLayout()); - SmallString<128> Name; - TM->getNameWithPrefix(Name, F, Mang); return (void*)Dyld.getSymbolLoadAddress(Name); } -void *MCJIT::recompileAndRelinkFunction(Function *F) { - report_fatal_error("not yet implemented"); -} - -void MCJIT::freeMachineCodeForFunction(Function *F) { - report_fatal_error("not yet implemented"); -} - void MCJIT::runStaticConstructorsDestructorsInModulePtrSet( bool isDtors, ModulePtrSet::iterator I, ModulePtrSet::iterator E) { for (; I != E; ++I) { - ExecutionEngine::runStaticConstructorsDestructors(*I, isDtors); + ExecutionEngine::runStaticConstructorsDestructors(**I, isDtors); } } @@ -529,8 +524,7 @@ GenericValue MCJIT::runFunction(Function *F, llvm_unreachable("Full-featured argument passing not supported yet!"); } -void *MCJIT::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure) { +void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) { if (!isSymbolSearchingDisabled()) { void *ptr = MemMgr.getPointerToNamedFunction(Name, false); if (ptr) @@ -559,8 +553,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; MutexGuard locked(lock); - SmallVector::reverse_iterator I= - std::find(EventListeners.rbegin(), EventListeners.rend(), L); + auto I = std::find(EventListeners.rbegin(), EventListeners.rend(), L); if (I != EventListeners.rend()) { std::swap(*I, EventListeners.back()); EventListeners.pop_back(); @@ -575,9 +568,8 @@ void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) { } void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) { MutexGuard locked(lock); - for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { - EventListeners[I]->NotifyFreeingObject(Obj); - } + for (JITEventListener *L : EventListeners) + L->NotifyFreeingObject(Obj); } uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) { @@ -588,5 +580,7 @@ uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) { Result = ParentEngine->getSymbolAddress(Name.substr(1), false); if (Result) return Result; + if (ParentEngine->isSymbolSearchingDisabled()) + return 0; return ClientMM->getSymbolAddress(Name); } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 100e9a2..bc943b9 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H -#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H +#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H +#define LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -101,8 +101,8 @@ private: // called. class MCJIT : public ExecutionEngine { - MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr, - bool AllocateGVsWithCode); + MCJIT(std::unique_ptr M, std::unique_ptr tm, + RTDyldMemoryManager *MemMgr); typedef llvm::SmallPtrSet ModulePtrSet; @@ -118,6 +118,9 @@ class MCJIT : public ExecutionEngine { ModulePtrSet::iterator begin_added() { return AddedModules.begin(); } ModulePtrSet::iterator end_added() { return AddedModules.end(); } + iterator_range added() { + return iterator_range(begin_added(), end_added()); + } ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); } ModulePtrSet::iterator end_loaded() { return LoadedModules.end(); } @@ -125,8 +128,8 @@ class MCJIT : public ExecutionEngine { ModulePtrSet::iterator begin_finalized() { return FinalizedModules.begin(); } ModulePtrSet::iterator end_finalized() { return FinalizedModules.end(); } - void addModule(Module *M) { - AddedModules.insert(M); + void addModule(std::unique_ptr M) { + AddedModules.insert(M.release()); } bool removeModule(Module *M) { @@ -208,18 +211,18 @@ class MCJIT : public ExecutionEngine { } }; - TargetMachine *TM; + std::unique_ptr TM; MCContext *Ctx; LinkingMemoryManager MemMgr; RuntimeDyld Dyld; - SmallVector EventListeners; + std::vector EventListeners; OwningModuleContainer OwnedModules; - SmallVector Archives; + SmallVector, 2> Archives; + SmallVector, 2> Buffers; - typedef SmallVector LoadedObjectList; - LoadedObjectList LoadedObjects; + SmallVector, 2> LoadedObjects; // An optional ObjectCache to be notified of compiled objects and used to // perform lookup of pre-compiled code to avoid re-compilation. @@ -238,9 +241,10 @@ public: /// @name ExecutionEngine interface implementation /// @{ - void addModule(Module *M) override; + void addModule(std::unique_ptr M) override; void addObjectFile(std::unique_ptr O) override; - void addArchive(object::Archive *O) override; + void addObjectFile(object::OwningBinary O) override; + void addArchive(object::OwningBinary O) override; bool removeModule(Module *M) override; /// FindFunctionNamed - Search all of the active modules to find the one that @@ -276,14 +280,8 @@ public: /// \param isDtors - Run the destructors instead of constructors. void runStaticConstructorsDestructors(bool isDtors) override; - void *getPointerToBasicBlock(BasicBlock *BB) override; - void *getPointerToFunction(Function *F) override; - void *recompileAndRelinkFunction(Function *F) override; - - void freeMachineCodeForFunction(Function *F) override; - GenericValue runFunction(Function *F, const std::vector &ArgValues) override; @@ -295,7 +293,7 @@ public: /// found, this function silently returns a null pointer. Otherwise, /// it prints a message to stderr and aborts. /// - void *getPointerToNamedFunction(const std::string &Name, + void *getPointerToNamedFunction(StringRef Name, bool AbortOnFailure = true) override; /// mapSectionAddress - map a section to its target address space value. @@ -315,7 +313,7 @@ public: uint64_t getGlobalValueAddress(const std::string &Name) override; uint64_t getFunctionAddress(const std::string &Name) override; - TargetMachine *getTargetMachine() override { return TM; } + TargetMachine *getTargetMachine() override { return TM.get(); } /// @} /// @name (Private) Registration Interfaces @@ -325,11 +323,10 @@ public: MCJITCtor = createJIT; } - static ExecutionEngine *createJIT(Module *M, + static ExecutionEngine *createJIT(std::unique_ptr M, std::string *ErrorStr, RTDyldMemoryManager *MemMgr, - bool GVsWithCode, - TargetMachine *TM); + std::unique_ptr TM); // @} @@ -344,7 +341,7 @@ protected: /// this function call is expected to be the contained module. The module /// is passed as a parameter here to prepare for multiple module support in /// the future. - ObjectBufferStream* emitObject(Module *M); + std::unique_ptr emitObject(Module *M); void NotifyObjectEmitted(const ObjectImage& Obj); void NotifyFreeingObject(const ObjectImage& Obj); diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile index c26e0ad..cf71432 100644 --- a/lib/ExecutionEngine/Makefile +++ b/lib/ExecutionEngine/Makefile @@ -11,7 +11,7 @@ LIBRARYNAME = LLVMExecutionEngine include $(LEVEL)/Makefile.config -PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld +PARALLEL_DIRS = Interpreter MCJIT RuntimeDyld ifeq ($(USE_INTEL_JITEVENTS), 1) PARALLEL_DIRS += IntelJITEvents diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index fd37a13..5a8ccb6 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -49,12 +49,6 @@ public: ~OProfileJITEventListener(); - virtual void NotifyFunctionEmitted(const Function &F, - void *FnStart, size_t FnSize, - const JITEvent_EmittedFunctionDetails &Details); - - virtual void NotifyFreeingMachineCode(void *OldPtr); - virtual void NotifyObjectEmitted(const ObjectImage &Obj); virtual void NotifyFreeingObject(const ObjectImage &Obj); @@ -81,90 +75,6 @@ OProfileJITEventListener::~OProfileJITEventListener() { } } -static debug_line_info LineStartToOProfileFormat( - const MachineFunction &MF, FilenameCache &Filenames, - uintptr_t Address, DebugLoc Loc) { - debug_line_info Result; - Result.vma = Address; - Result.lineno = Loc.getLine(); - Result.filename = Filenames.getFilename( - Loc.getScope(MF.getFunction()->getContext())); - DEBUG(dbgs() << "Mapping " << reinterpret_cast(Result.vma) << " to " - << Result.filename << ":" << Result.lineno << "\n"); - return Result; -} - -// Adds the just-emitted function to the symbol table. -void OProfileJITEventListener::NotifyFunctionEmitted( - const Function &F, void *FnStart, size_t FnSize, - const JITEvent_EmittedFunctionDetails &Details) { - assert(F.hasName() && FnStart != 0 && "Bad symbol to add"); - if (Wrapper.op_write_native_code(F.getName().data(), - reinterpret_cast(FnStart), - FnStart, FnSize) == -1) { - DEBUG(dbgs() << "Failed to tell OProfile about native function " - << F.getName() << " at [" - << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); - return; - } - - if (!Details.LineStarts.empty()) { - // Now we convert the line number information from the address/DebugLoc - // format in Details to the address/filename/lineno format that OProfile - // expects. Note that OProfile 0.9.4 has a bug that causes it to ignore - // line numbers for addresses above 4G. - FilenameCache Filenames; - std::vector LineInfo; - LineInfo.reserve(1 + Details.LineStarts.size()); - - DebugLoc FirstLoc = Details.LineStarts[0].Loc; - assert(!FirstLoc.isUnknown() - && "LineStarts should not contain unknown DebugLocs"); - MDNode *FirstLocScope = FirstLoc.getScope(F.getContext()); - DISubprogram FunctionDI = getDISubprogram(FirstLocScope); - if (FunctionDI.Verify()) { - // If we have debug info for the function itself, use that as the line - // number of the first several instructions. Otherwise, after filling - // LineInfo, we'll adjust the address of the first line number to point at - // the start of the function. - debug_line_info line_info; - line_info.vma = reinterpret_cast(FnStart); - line_info.lineno = FunctionDI.getLineNumber(); - line_info.filename = Filenames.getFilename(FirstLocScope); - LineInfo.push_back(line_info); - } - - for (std::vector::const_iterator - I = Details.LineStarts.begin(), E = Details.LineStarts.end(); - I != E; ++I) { - LineInfo.push_back(LineStartToOProfileFormat( - *Details.MF, Filenames, I->Address, I->Loc)); - } - - // In case the function didn't have line info of its own, adjust the first - // line info's address to include the start of the function. - LineInfo[0].vma = reinterpret_cast(FnStart); - - if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(), - &*LineInfo.begin()) == -1) { - DEBUG(dbgs() - << "Failed to tell OProfile about line numbers for native function " - << F.getName() << " at [" - << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); - } - } -} - -// Removes the being-deleted function from the symbol table. -void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) { - assert(FnStart && "Invalid function pointer"); - if (Wrapper.op_unload_native_code(reinterpret_cast(FnStart)) == -1) { - DEBUG(dbgs() - << "Failed to tell OProfile about unload of native function at " - << FnStart << "\n"); - } -} - void OProfileJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { if (!Wrapper.isAgentAvailable()) { return; diff --git a/lib/ExecutionEngine/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RTDyldMemoryManager.cpp index 1646937..51b2d0f 100644 --- a/lib/ExecutionEngine/RTDyldMemoryManager.cpp +++ b/lib/ExecutionEngine/RTDyldMemoryManager.cpp @@ -210,7 +210,8 @@ ARM_MATH_IMPORTS(ARM_MATH_DECL) #undef ARM_MATH_DECL #endif -uint64_t RTDyldMemoryManager::getSymbolAddress(const std::string &Name) { +uint64_t +RTDyldMemoryManager::getSymbolAddressInProcess(const std::string &Name) { // This implementation assumes that the host program is the target. // Clients generating code for a remote target should implement their own // memory manager. @@ -253,19 +254,19 @@ uint64_t RTDyldMemoryManager::getSymbolAddress(const std::string &Name) { // is called before ExecutionEngine::runFunctionAsMain() is called. if (Name == "__main") return (uint64_t)&jit_noop; + // Try to demangle Name before looking it up in the process, otherwise symbol + // '_' (if present) will shadow '', and there will be no way to + // refer to the latter. + const char *NameStr = Name.c_str(); - void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); - if (Ptr) - return (uint64_t)Ptr; - // If it wasn't found and if it starts with an underscore ('_') character, - // try again without the underscore. - if (NameStr[0] == '_') { - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); - if (Ptr) + if (NameStr[0] == '_') + if (void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr + 1)) return (uint64_t)Ptr; - } - return 0; + + // If we Name did not require demangling, or we failed to find the demangled + // name, try again without demangling. + return (uint64_t)sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); } void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name, diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp index 8546571..dfa3a20 100644 --- a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp @@ -101,7 +101,7 @@ private: /// Lock used to serialize all jit registration events, since they /// modify global variables. -llvm::sys::Mutex JITDebugLock; +ManagedStatic JITDebugLock; /// Do the registration. void NotifyDebugger(jit_code_entry* JITCodeEntry) { @@ -121,7 +121,7 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) { GDBJITRegistrar::~GDBJITRegistrar() { // Free all registered object files. - llvm::MutexGuard locked(JITDebugLock); + llvm::MutexGuard locked(*JITDebugLock); for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(), E = ObjectBufferMap.end(); I != E; ++I) { // Call the private method that doesn't update the map so our iterator @@ -137,7 +137,7 @@ void GDBJITRegistrar::registerObject(const ObjectBuffer &Object) { size_t Size = Object.getBufferSize(); assert(Buffer && "Attempt to register a null object with a debugger."); - llvm::MutexGuard locked(JITDebugLock); + llvm::MutexGuard locked(*JITDebugLock); assert(ObjectBufferMap.find(Buffer) == ObjectBufferMap.end() && "Second attempt to perform debug registration."); jit_code_entry* JITCodeEntry = new jit_code_entry(); @@ -156,7 +156,7 @@ void GDBJITRegistrar::registerObject(const ObjectBuffer &Object) { bool GDBJITRegistrar::deregisterObject(const ObjectBuffer& Object) { const char *Buffer = Object.getBufferStart(); - llvm::MutexGuard locked(JITDebugLock); + llvm::MutexGuard locked(*JITDebugLock); RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(Buffer); if (I != ObjectBufferMap.end()) { diff --git a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h index 6a514ea..636011f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h +++ b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H -#define LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H +#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_JITREGISTRAR_H +#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_JITREGISTRAR_H #include "llvm/ExecutionEngine/ObjectBuffer.h" @@ -41,4 +41,4 @@ public: } // end namespace llvm -#endif // LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H +#endif diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h index c3a2182..9bbf6a0d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h +++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H -#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H +#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_OBJECTIMAGECOMMON_H +#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_OBJECTIMAGECOMMON_H #include "llvm/ExecutionEngine/ObjectBuffer.h" #include "llvm/ExecutionEngine/ObjectImage.h" @@ -36,20 +36,17 @@ protected: // This form of the constructor allows subclasses to use // format-specific subclasses of ObjectFile directly - ObjectImageCommon(ObjectBuffer *Input, std::unique_ptr Obj) - : ObjectImage(Input), // saves Input as Buffer and takes ownership - ObjFile(std::move(Obj)) - { - } + ObjectImageCommon(std::unique_ptr Input, + std::unique_ptr Obj) + : ObjectImage(std::move(Input)), ObjFile(std::move(Obj)) {} public: - ObjectImageCommon(ObjectBuffer* Input) - : ObjectImage(Input) // saves Input as Buffer and takes ownership - { + ObjectImageCommon(std::unique_ptr Input) + : ObjectImage(std::move(Input)) { // FIXME: error checking? createObjectFile returns an ErrorOr // and should probably be checked for failure. - std::unique_ptr Buf(Buffer->getMemBuffer()); - ObjFile.reset(object::ObjectFile::createObjectFile(Buf).get()); + MemoryBufferRef Buf = Buffer->getMemBuffer(); + ObjFile = std::move(object::ObjectFile::createObjectFile(Buf).get()); } ObjectImageCommon(std::unique_ptr Input) : ObjectImage(nullptr), ObjFile(std::move(Input)) {} @@ -86,4 +83,4 @@ public: } // end namespace llvm -#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H +#endif diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 9dfd167..c7c67f6 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -14,6 +14,7 @@ #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "JITRegistrar.h" #include "ObjectImageCommon.h" +#include "RuntimeDyldCheckerImpl.h" #include "RuntimeDyldELF.h" #include "RuntimeDyldImpl.h" #include "RuntimeDyldMachO.h" @@ -40,6 +41,44 @@ void RuntimeDyldImpl::registerEHFrames() {} void RuntimeDyldImpl::deregisterEHFrames() {} +#ifndef NDEBUG +static void dumpSectionMemory(const SectionEntry &S, StringRef State) { + dbgs() << "----- Contents of section " << S.Name << " " << State << " -----"; + + if (S.Address == nullptr) { + dbgs() << "\n
\n"; + return; + } + + const unsigned ColsPerRow = 16; + + uint8_t *DataAddr = S.Address; + uint64_t LoadAddr = S.LoadAddress; + + unsigned StartPadding = LoadAddr & (ColsPerRow - 1); + unsigned BytesRemaining = S.Size; + + if (StartPadding) { + dbgs() << "\n" << format("0x%016" PRIx64, LoadAddr & ~(ColsPerRow - 1)) << ":"; + while (StartPadding--) + dbgs() << " "; + } + + while (BytesRemaining > 0) { + if ((LoadAddr & (ColsPerRow - 1)) == 0) + dbgs() << "\n" << format("0x%016" PRIx64, LoadAddr) << ":"; + + dbgs() << " " << format("%02x", *DataAddr); + + ++DataAddr; + ++LoadAddr; + --BytesRemaining; + } + + dbgs() << "\n"; +} +#endif + // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { MutexGuard locked(lock); @@ -55,8 +94,10 @@ void RuntimeDyldImpl::resolveRelocations() { // entry provides the section to which the relocation will be applied. uint64_t Addr = Sections[i].LoadAddress; DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t" - << format("%p", (uint8_t *)Addr) << "\n"); + << format("0x%x", Addr) << "\n"); + DEBUG(dumpSectionMemory(Sections[i], "before relocations")); resolveRelocationList(Relocations[i], Addr); + DEBUG(dumpSectionMemory(Sections[i], "after relocations")); Relocations.erase(i); } } @@ -88,23 +129,20 @@ static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { if (std::error_code EC = Sym.getSection(SecI)) return EC; - if (SecI == Obj->section_end()) { - Result = UnknownAddressOrSize; - return object_error::success; - } - - uint64_t SectionAddress; - if (std::error_code EC = SecI->getAddress(SectionAddress)) - return EC; + if (SecI == Obj->section_end()) { + Result = UnknownAddressOrSize; + return object_error::success; + } + uint64_t SectionAddress = SecI->getAddress(); Result = Address - SectionAddress; return object_error::success; } -ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { +std::unique_ptr +RuntimeDyldImpl::loadObject(std::unique_ptr Obj) { MutexGuard locked(lock); - std::unique_ptr Obj(InputObject); if (!Obj) return nullptr; @@ -158,14 +196,13 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { SymType == object::SymbolRef::ST_Unknown) { uint64_t SectOffset; StringRef SectionData; - bool IsCode; section_iterator SI = Obj->end_sections(); Check(getOffset(*I, SectOffset)); Check(I->getSection(SI)); if (SI == Obj->end_sections()) continue; Check(SI->getContents(SectionData)); - Check(SI->isText(IsCode)); + bool IsCode = SI->isText(); unsigned SectionID = findOrEmitSection(*Obj, *SI, IsCode, LocalSections); LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset); @@ -195,8 +232,7 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { if (I == E && !ProcessAllSections) continue; - bool IsCode = false; - Check(RelocatedSection->isText(IsCode)); + bool IsCode = RelocatedSection->isText(); SectionID = findOrEmitSection(*Obj, *RelocatedSection, IsCode, LocalSections); DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n"); @@ -204,12 +240,17 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { for (; I != E;) I = processRelocationRef(SectionID, I, *Obj, LocalSections, LocalSymbols, Stubs); + + // If there is an attached checker, notify it about the stubs for this + // section so that they can be verified. + if (Checker) + Checker->registerStubMap(Obj->getImageName(), SectionID, Stubs); } // Give the subclasses a chance to tie-up any loose ends. finalizeLoad(*Obj, LocalSections); - return Obj.release(); + return Obj; } // A helper method for computeTotalAllocSize. @@ -245,20 +286,15 @@ void RuntimeDyldImpl::computeTotalAllocSize(ObjectImage &Obj, SI != SE; ++SI) { const SectionRef &Section = *SI; - bool IsRequired; - Check(Section.isRequiredForExecution(IsRequired)); + bool IsRequired = Section.isRequiredForExecution(); // Consider only the sections that are required to be loaded for execution if (IsRequired) { - uint64_t DataSize = 0; - uint64_t Alignment64 = 0; - bool IsCode = false; - bool IsReadOnly = false; StringRef Name; - Check(Section.getSize(DataSize)); - Check(Section.getAlignment(Alignment64)); - Check(Section.isText(IsCode)); - Check(Section.isReadOnlyData(IsReadOnly)); + uint64_t DataSize = Section.getSize(); + uint64_t Alignment64 = Section.getAlignment(); + bool IsCode = Section.isText(); + bool IsReadOnly = Section.isReadOnlyData(); Check(Section.getName(Name)); unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; @@ -340,10 +376,8 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(ObjectImage &Obj, } // Get section data size and alignment - uint64_t Alignment64; - uint64_t DataSize; - Check(Section.getSize(DataSize)); - Check(Section.getAlignment(Alignment64)); + uint64_t DataSize = Section.getSize(); + uint64_t Alignment64 = Section.getAlignment(); // Add stubbuf size alignment unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; @@ -354,6 +388,36 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(ObjectImage &Obj, return StubBufSize; } +uint64_t RuntimeDyldImpl::readBytesUnaligned(uint8_t *Src, + unsigned Size) const { + uint64_t Result = 0; + if (IsTargetLittleEndian) { + Src += Size - 1; + while (Size--) + Result = (Result << 8) | *Src--; + } else + while (Size--) + Result = (Result << 8) | *Src++; + + return Result; +} + +void RuntimeDyldImpl::writeBytesUnaligned(uint64_t Value, uint8_t *Dst, + unsigned Size) const { + if (IsTargetLittleEndian) { + while (Size--) { + *Dst++ = Value & 0xFF; + Value >>= 8; + } + } else { + Dst += Size - 1; + while (Size--) { + *Dst-- = Value & 0xFF; + Value >>= 8; + } + } +} + void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, const CommonSymbolMap &CommonSymbols, uint64_t TotalSize, @@ -365,7 +429,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, if (!Addr) report_fatal_error("Unable to allocate memory for common symbols!"); uint64_t Offset = 0; - Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, 0)); + Sections.push_back(SectionEntry("", Addr, TotalSize, 0)); memset(Addr, 0, TotalSize); DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID << " new addr: " @@ -397,24 +461,18 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, const SectionRef &Section, bool IsCode) { StringRef data; - uint64_t Alignment64; Check(Section.getContents(data)); - Check(Section.getAlignment(Alignment64)); + uint64_t Alignment64 = Section.getAlignment(); unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; - bool IsRequired; - bool IsVirtual; - bool IsZeroInit; - bool IsReadOnly; - uint64_t DataSize; unsigned PaddingSize = 0; unsigned StubBufSize = 0; StringRef Name; - Check(Section.isRequiredForExecution(IsRequired)); - Check(Section.isVirtual(IsVirtual)); - Check(Section.isZeroInit(IsZeroInit)); - Check(Section.isReadOnlyData(IsReadOnly)); - Check(Section.getSize(DataSize)); + bool IsRequired = Section.isRequiredForExecution(); + bool IsVirtual = Section.isVirtual(); + bool IsZeroInit = Section.isZeroInit(); + bool IsReadOnly = Section.isReadOnlyData(); + uint64_t DataSize = Section.getSize(); Check(Section.getName(Name)); StubBufSize = computeSectionStubBufSize(Obj, Section); @@ -477,6 +535,10 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, } Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData)); + + if (Checker) + Checker->registerSection(Obj.getImageName(), SectionID); + return SectionID; } @@ -517,34 +579,26 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, } } -uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { - if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be || - Arch == Triple::arm64 || Arch == Triple::arm64_be) { +uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, + unsigned AbiVariant) { + if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) { // This stub has to be able to access the full address space, // since symbol lookup won't necessarily find a handy, in-range, // PLT stub for functions which could be anywhere. - uint32_t *StubAddr = (uint32_t *)Addr; - // Stub can use ip0 (== x16) to calculate address - *StubAddr = 0xd2e00010; // movz ip0, #:abs_g3: - StubAddr++; - *StubAddr = 0xf2c00010; // movk ip0, #:abs_g2_nc: - StubAddr++; - *StubAddr = 0xf2a00010; // movk ip0, #:abs_g1_nc: - StubAddr++; - *StubAddr = 0xf2800010; // movk ip0, #:abs_g0_nc: - StubAddr++; - *StubAddr = 0xd61f0200; // br ip0 + writeBytesUnaligned(0xd2e00010, Addr, 4); // movz ip0, #:abs_g3: + writeBytesUnaligned(0xf2c00010, Addr+4, 4); // movk ip0, #:abs_g2_nc: + writeBytesUnaligned(0xf2a00010, Addr+8, 4); // movk ip0, #:abs_g1_nc: + writeBytesUnaligned(0xf2800010, Addr+12, 4); // movk ip0, #:abs_g0_nc: + writeBytesUnaligned(0xd61f0200, Addr+16, 4); // br ip0 return Addr; } else if (Arch == Triple::arm || Arch == Triple::armeb) { // TODO: There is only ARM far stub now. We should add the Thumb stub, // and stubs for branches Thumb - ARM and ARM - Thumb. - uint32_t *StubAddr = (uint32_t *)Addr; - *StubAddr = 0xe51ff004; // ldr pc,