aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorPirama Arumuga Nainar <pirama@google.com>2015-04-08 08:55:49 -0700
committerPirama Arumuga Nainar <pirama@google.com>2015-04-09 15:04:38 -0700
commit4c5e43da7792f75567b693105cc53e3f1992ad98 (patch)
tree1b2c9792582e12f5af0b1512e3094425f0dc0df9 /lib
parentc75239e6119d0f9a74c57099d91cbc9bde56bf33 (diff)
downloadexternal_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.zip
external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.gz
external_llvm-4c5e43da7792f75567b693105cc53e3f1992ad98.tar.bz2
Update aosp/master llvm for rebase to r233350
Change-Id: I07d935f8793ee8ec6b7da003f6483046594bca49
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp10
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp3
-rw-r--r--lib/Analysis/AliasDebugger.cpp2
-rw-r--r--lib/Analysis/Analysis.cpp1
-rw-r--r--lib/Analysis/Android.mk1
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp62
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp1
-rw-r--r--lib/Analysis/CFLAliasAnalysis.cpp239
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/CodeMetrics.cpp1
-rw-r--r--lib/Analysis/ConstantFolding.cpp259
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp62
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp41
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp12
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp61
-rw-r--r--lib/Analysis/IVUsers.cpp9
-rw-r--r--lib/Analysis/InstructionSimplify.cpp120
-rw-r--r--lib/Analysis/JumpInstrTableInfo.cpp55
-rw-r--r--lib/Analysis/LazyValueInfo.cpp110
-rw-r--r--lib/Analysis/LibCallAliasAnalysis.cpp6
-rw-r--r--lib/Analysis/LibCallSemantics.cpp12
-rw-r--r--lib/Analysis/Lint.cpp378
-rw-r--r--lib/Analysis/Loads.cpp28
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp417
-rw-r--r--lib/Analysis/LoopInfo.cpp1
-rw-r--r--lib/Analysis/LoopPass.cpp1
-rw-r--r--lib/Analysis/MemDerefPrinter.cpp5
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp63
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp67
-rw-r--r--lib/Analysis/ModuleDebugInfoPrinter.cpp62
-rw-r--r--lib/Analysis/NoAliasAnalysis.cpp7
-rw-r--r--lib/Analysis/PHITransAddr.cpp7
-rw-r--r--lib/Analysis/RegionPass.cpp25
-rw-r--r--lib/Analysis/ScalarEvolution.cpp629
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp3
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp121
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp7
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp487
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp11
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp22
-rw-r--r--lib/Analysis/ValueTracking.cpp741
-rw-r--r--lib/AsmParser/LLParser.cpp101
-rw-r--r--lib/AsmParser/Parser.cpp1
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp447
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h369
-rw-r--r--lib/Bitcode/Reader/BitstreamReader.cpp2
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp9
-rw-r--r--lib/CMakeLists.txt1
-rw-r--r--lib/CodeGen/Analysis.cpp9
-rw-r--r--lib/CodeGen/Android.mk4
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp199
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp121
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterHandler.h4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp37
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h29
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp85
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp1
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h39
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp63
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h6
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp51
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp43
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h24
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp466
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h56
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h45
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp78
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h31
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp25
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp48
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h11
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp29
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h10
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp14
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp12
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp33
-rw-r--r--lib/CodeGen/BranchFolding.cpp61
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CodeGen.cpp20
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp231
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp109
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp35
-rw-r--r--lib/CodeGen/ForwardControlFlowIntegrity.cpp374
-rw-r--r--lib/CodeGen/IfConversion.cpp18
-rw-r--r--lib/CodeGen/InterferenceCache.cpp3
-rw-r--r--lib/CodeGen/InterferenceCache.h4
-rw-r--r--lib/CodeGen/JumpInstrTables.cpp296
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp79
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp13
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp11
-rw-r--r--lib/CodeGen/LiveInterval.cpp13
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp8
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp1
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp4
-rw-r--r--lib/CodeGen/LiveRangeCalc.h2
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp6
-rw-r--r--lib/CodeGen/LiveVariables.cpp1
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp12
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp431
-rw-r--r--lib/CodeGen/MachineCSE.cpp1
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp7
-rw-r--r--lib/CodeGen/MachineDominators.cpp66
-rw-r--r--lib/CodeGen/MachineFunction.cpp18
-rw-r--r--lib/CodeGen/MachineInstr.cpp67
-rw-r--r--lib/CodeGen/MachineLICM.cpp61
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp1
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp2
-rw-r--r--lib/CodeGen/MachineScheduler.cpp11
-rw-r--r--lib/CodeGen/MachineVerifier.cpp64
-rw-r--r--lib/CodeGen/PHIElimination.cpp21
-rw-r--r--lib/CodeGen/Passes.cpp50
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp6
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp60
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h78
-rw-r--r--lib/CodeGen/RegAllocBase.cpp1
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp3
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp83
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp7
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp170
-rw-r--r--lib/CodeGen/RegisterPressure.cpp2
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp107
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp901
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp154
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp101
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp76
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp735
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h45
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp79
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp9
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp35
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp3
-rw-r--r--lib/CodeGen/SlotIndexes.cpp2
-rw-r--r--lib/CodeGen/StackColoring.cpp2
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp64
-rw-r--r--lib/CodeGen/StackMaps.cpp186
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp12
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp45
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp120
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp155
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp63
-rw-r--r--lib/CodeGen/VirtRegMap.cpp2
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp1629
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugFrame.cpp1
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLoc.cpp3
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp19
-rw-r--r--lib/DebugInfo/PDB/CMakeLists.txt2
-rw-r--r--lib/DebugInfo/PDB/DIA/DIASession.cpp45
-rw-r--r--lib/DebugInfo/PDB/PDB.cpp8
-rw-r--r--lib/DebugInfo/PDB/PDBExtras.cpp39
-rw-r--r--lib/DebugInfo/PDB/PDBSymDumper.cpp93
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolBlock.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompiland.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCustom.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolData.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolExe.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFunc.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolLabel.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolThunk.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp15
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolUnknown.cpp5
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp5
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp38
-rw-r--r--lib/ExecutionEngine/Interpreter/CMakeLists.txt2
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp17
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp7
-rw-r--r--lib/ExecutionEngine/Orc/IndirectionUtils.cpp1
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.h4
-rw-r--r--lib/ExecutionEngine/Orc/OrcTargetSupport.cpp6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Android.mk1
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt1
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp81
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp85
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h46
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp18
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h10
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h49
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h214
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp1
-rw-r--r--lib/Fuzzer/FuzzerFlags.def1
-rw-r--r--lib/Fuzzer/FuzzerInternal.h10
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp14
-rw-r--r--lib/Fuzzer/test/CMakeLists.txt1
-rw-r--r--lib/Fuzzer/test/CounterTest.cpp14
-rw-r--r--lib/Fuzzer/test/fuzzer.test3
-rw-r--r--lib/IR/AsmWriter.cpp975
-rw-r--r--lib/IR/AsmWriter.h129
-rw-r--r--lib/IR/AutoUpgrade.cpp152
-rw-r--r--lib/IR/BasicBlock.cpp24
-rw-r--r--lib/IR/ConstantFold.cpp59
-rw-r--r--lib/IR/ConstantRange.cpp45
-rw-r--r--lib/IR/Constants.cpp25
-rw-r--r--lib/IR/Core.cpp2
-rw-r--r--lib/IR/DIBuilder.cpp707
-rw-r--r--lib/IR/DataLayout.cpp134
-rw-r--r--lib/IR/DebugInfo.cpp833
-rw-r--r--lib/IR/DiagnosticInfo.cpp2
-rw-r--r--lib/IR/GCOV.cpp7
-rw-r--r--lib/IR/Globals.cpp4
-rw-r--r--lib/IR/InlineAsm.cpp2
-rw-r--r--lib/IR/Instruction.cpp8
-rw-r--r--lib/IR/Instructions.cpp253
-rw-r--r--lib/IR/LLVMContextImpl.h6
-rw-r--r--lib/IR/LegacyPassManager.cpp2
-rw-r--r--lib/IR/Mangler.cpp6
-rw-r--r--lib/IR/Module.cpp30
-rw-r--r--lib/IR/TypeFinder.cpp2
-rw-r--r--lib/IR/Value.cpp46
-rw-r--r--lib/IR/Verifier.cpp2012
-rw-r--r--lib/LLVMBuild.txt2
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp38
-rw-r--r--lib/LTO/LTOModule.cpp5
-rw-r--r--lib/Linker/LinkModules.cpp66
-rw-r--r--lib/MC/ELFObjectWriter.cpp39
-rw-r--r--lib/MC/MCAsmInfo.cpp2
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp1
-rw-r--r--lib/MC/MCAsmStreamer.cpp2
-rw-r--r--lib/MC/MCAssembler.cpp65
-rw-r--r--lib/MC/MCContext.cpp155
-rw-r--r--lib/MC/MCDwarf.cpp44
-rw-r--r--lib/MC/MCELFStreamer.cpp10
-rw-r--r--lib/MC/MCExpr.cpp119
-rw-r--r--lib/MC/MCLinkerOptimizationHint.cpp2
-rw-r--r--lib/MC/MCMachOStreamer.cpp50
-rw-r--r--lib/MC/MCObjectFileInfo.cpp521
-rw-r--r--lib/MC/MCObjectStreamer.cpp14
-rw-r--r--lib/MC/MCObjectWriter.cpp10
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp2
-rw-r--r--lib/MC/MCParser/AsmParser.cpp44
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp2
-rw-r--r--lib/MC/MCSection.cpp9
-rw-r--r--lib/MC/MCSectionMachO.cpp6
-rw-r--r--lib/MC/MCStreamer.cpp28
-rw-r--r--lib/MC/MCWinEH.cpp1
-rw-r--r--lib/MC/MachObjectWriter.cpp1
-rw-r--r--lib/MC/SubtargetFeature.cpp20
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp8
-rw-r--r--lib/MC/WinCOFFStreamer.cpp4
-rw-r--r--lib/Makefile2
-rw-r--r--lib/Object/Archive.cpp61
-rw-r--r--lib/Object/COFFObjectFile.cpp13
-rw-r--r--lib/Object/ELFYAML.cpp1
-rw-r--r--lib/Object/IRObjectFile.cpp14
-rw-r--r--lib/Option/Arg.cpp25
-rw-r--r--lib/Option/ArgList.cpp34
-rw-r--r--lib/Option/OptTable.cpp14
-rw-r--r--lib/Option/Option.cpp3
-rw-r--r--lib/Passes/Android.mk30
-rw-r--r--lib/Passes/CMakeLists.txt8
-rw-r--r--lib/Passes/LLVMBuild.txt22
-rw-r--r--lib/Passes/Makefile14
-rw-r--r--lib/Passes/PassBuilder.cpp412
-rw-r--r--lib/Passes/PassRegistry.def77
-rw-r--r--lib/ProfileData/CoverageMapping.cpp6
-rw-r--r--lib/ProfileData/CoverageMappingReader.cpp161
-rw-r--r--lib/ProfileData/InstrProfReader.cpp3
-rw-r--r--lib/Support/APFloat.cpp6
-rw-r--r--lib/Support/APInt.cpp70
-rw-r--r--lib/Support/Allocator.cpp5
-rw-r--r--lib/Support/Android.mk2
-rw-r--r--lib/Support/CMakeLists.txt2
-rw-r--r--lib/Support/CommandLine.cpp29
-rw-r--r--lib/Support/Compression.cpp1
-rw-r--r--lib/Support/CrashRecoveryContext.cpp2
-rw-r--r--lib/Support/DAGDeltaAlgorithm.cpp26
-rw-r--r--lib/Support/DataStream.cpp2
-rw-r--r--lib/Support/Debug.cpp1
-rw-r--r--lib/Support/FileOutputBuffer.cpp14
-rw-r--r--lib/Support/FoldingSet.cpp7
-rw-r--r--lib/Support/FormattedStream.cpp1
-rw-r--r--lib/Support/GraphWriter.cpp3
-rw-r--r--lib/Support/Host.cpp8
-rw-r--r--lib/Support/IsInf.cpp49
-rw-r--r--lib/Support/IsNAN.cpp33
-rw-r--r--lib/Support/LockFileManager.cpp27
-rw-r--r--lib/Support/MemoryBuffer.cpp1
-rw-r--r--lib/Support/Path.cpp15
-rw-r--r--lib/Support/Process.cpp2
-rw-r--r--lib/Support/Program.cpp1
-rw-r--r--lib/Support/RandomNumberGenerator.cpp6
-rw-r--r--lib/Support/Regex.cpp3
-rw-r--r--lib/Support/ScaledNumber.cpp1
-rw-r--r--lib/Support/SourceMgr.cpp2
-rw-r--r--lib/Support/SpecialCaseList.cpp2
-rw-r--r--lib/Support/StreamingMemoryObject.cpp3
-rw-r--r--lib/Support/StringExtras.cpp1
-rw-r--r--lib/Support/SystemUtils.cpp2
-rw-r--r--lib/Support/TargetRegistry.cpp1
-rw-r--r--lib/Support/Timer.cpp2
-rw-r--r--lib/Support/Triple.cpp46
-rw-r--r--lib/Support/Twine.cpp14
-rw-r--r--lib/Support/Unix/Program.inc3
-rw-r--r--lib/Support/Unix/Signals.inc44
-rw-r--r--lib/Support/Windows/Path.inc4
-rw-r--r--lib/Support/Windows/Process.inc13
-rw-r--r--lib/Support/Windows/Signals.inc222
-rw-r--r--lib/Support/YAMLParser.cpp1
-rw-r--r--lib/Support/YAMLTraits.cpp22
-rw-r--r--lib/TableGen/TGParser.cpp1
-rw-r--r--lib/Target/AArch64/AArch64.td7
-rw-r--r--lib/Target/AArch64/AArch64A53Fix835769.cpp1
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp26
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp1
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp77
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp6
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp14
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp4
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp47
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp302
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h35
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp10
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h7
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td297
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp127
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp11
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp2
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp7
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp47
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h19
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp2
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h3
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp31
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp21
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp12
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp125
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp21
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp23
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp1
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp129
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h20
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp96
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h51
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp12
-rw-r--r--lib/Target/ARM/ARM.td30
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp72
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp34
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp83
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h27
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp4
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp5
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp9
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp457
-rw-r--r--lib/Target/ARM/ARMISelLowering.h33
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td15
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td215
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td107
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp3
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp66
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h4
-rw-r--r--lib/Target/ARM/Android.mk4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp9
-rw-r--r--lib/Target/ARM/CMakeLists.txt3
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMArchName.def3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp29
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp183
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp72
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp10
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp3
-rw-r--r--lib/Target/ARM/README-Thumb.txt2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp22
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h2
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp53
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h38
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp4
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp (renamed from lib/Target/ARM/Thumb1RegisterInfo.cpp)128
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.h (renamed from lib/Target/ARM/Thumb1RegisterInfo.h)14
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp6
-rw-r--r--lib/Target/BPF/BPFISelLowering.h1
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.h3
-rw-r--r--lib/Target/BPF/BPFTargetMachine.cpp4
-rw-r--r--lib/Target/BPF/BPFTargetMachine.h7
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp1
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp10
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h1
-rw-r--r--lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp2
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp3
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h15
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/Hexagon.h1
-rw-r--r--lib/Target/Hexagon/Hexagon.td16
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp161
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp1555
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp896
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h81
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td63
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td94
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp179
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h22
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td473
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td465
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV5.td8
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoVector.td418
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td49
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV4.td10
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td388
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h12
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp85
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp172
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h5
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp23
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h4
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp10
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp22
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h1
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h2
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp8
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h3
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp3
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp5
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h4
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp99
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp16
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp148
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp36
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td8
-rw-r--r--lib/Target/Mips/Mips.h5
-rw-r--r--lib/Target/Mips/Mips.td12
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp185
-rw-r--r--lib/Target/Mips/Mips16HardFloat.h43
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp4
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h2
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp9
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h2
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td4
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp27
-rw-r--r--lib/Target/Mips/MipsCallingConv.td2
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp36
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp92
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp17
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp46
-rw-r--r--lib/Target/Mips/MipsISelLowering.h11
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td24
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp1
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp17
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.cpp38
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.h58
-rw-r--r--lib/Target/Mips/MipsOs16.cpp129
-rw-r--r--lib/Target/Mips/MipsOs16.h47
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp15
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h14
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp84
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h10
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp2
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp14
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h2
-rw-r--r--lib/Target/Mips/MipsSchedule.td4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp22
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h4
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp14
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h4
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h7
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp50
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.cpp40
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h28
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp34
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h4
-rw-r--r--lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp1
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp11
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp15
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h6
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp35
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h29
-rw-r--r--lib/Target/NVPTX/NVPTXLowerStructArgs.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h4
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h3
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h5
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp12
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h3
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXutil.cpp90
-rw-r--r--lib/Target/NVPTX/NVPTXutil.h25
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp38
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp8
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp6
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp14
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp16
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp122
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h3
-rw-r--r--lib/Target/PowerPC/PPC.td19
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp23
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp5
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp16
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp48
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp297
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h26
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td28
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td94
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td87
-rw-r--r--lib/Target/PowerPC/PPCInstrHTM.td172
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp58
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td82
-rw-r--r--lib/Target/PowerPC/PPCInstrQPX.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td2
-rw-r--r--lib/Target/PowerPC/PPCLoopDataPrefetch.cpp6
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp21
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp3
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp170
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h52
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td2
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td398
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp3
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp17
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h1
-rw-r--r--lib/Target/PowerPC/README.txt7
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt104
-rw-r--r--lib/Target/R600/AMDGPU.td5
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp10
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp127
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp3
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp30
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h13
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td22
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td1
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp13
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp5
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h3
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h9
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp30
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h10
-rw-r--r--lib/Target/R600/AMDGPUTargetTransformInfo.cpp5
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp26
-rw-r--r--lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp7
-rw-r--r--lib/Target/R600/EvergreenInstructions.td5
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp16
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp58
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h4
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp4
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp1
-rw-r--r--lib/Target/R600/Processors.td8
-rw-r--r--lib/Target/R600/R600ClauseMergePass.cpp2
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp10
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp14
-rw-r--r--lib/Target/R600/R600OptimizeVectorRegisters.cpp4
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp10
-rw-r--r--lib/Target/R600/R600RegisterInfo.h2
-rw-r--r--lib/Target/R600/SIFixSGPRLiveRanges.cpp1
-rw-r--r--lib/Target/R600/SIFoldOperands.cpp3
-rw-r--r--lib/Target/R600/SIISelLowering.cpp53
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp6
-rw-r--r--lib/Target/R600/SIInstrFormats.td11
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp112
-rw-r--r--lib/Target/R600/SIInstrInfo.h6
-rw-r--r--lib/Target/R600/SIInstrInfo.td569
-rw-r--r--lib/Target/R600/SIInstructions.td486
-rw-r--r--lib/Target/R600/SILoadStoreOptimizer.cpp5
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp84
-rw-r--r--lib/Target/R600/SIRegisterInfo.h9
-rw-r--r--lib/Target/R600/SIRegisterInfo.td7
-rw-r--r--lib/Target/R600/SIShrinkInstructions.cpp8
-rw-r--r--lib/Target/README.txt41
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp1
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp96
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h1
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp9
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp14
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp5
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp16
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h15
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp11
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h6
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp1
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp12
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h1
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp39
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h20
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp15
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h6
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp10
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h6
-rw-r--r--lib/Target/Target.cpp4
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp6
-rw-r--r--lib/Target/TargetMachine.cpp17
-rw-r--r--lib/Target/TargetMachineC.cpp3
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp20
-rw-r--r--lib/Target/X86/Android.mk2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp42
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h8
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp42
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp9
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h12
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp417
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp238
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h19
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp8
-rw-r--r--lib/Target/X86/README-SSE.txt78
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp3
-rw-r--r--lib/Target/X86/X86FastISel.cpp152
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp21
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1258
-rw-r--r--lib/Target/X86/X86ISelLowering.h147
-rw-r--r--lib/Target/X86/X86InstrAVX512.td773
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td108
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp55
-rw-r--r--lib/Target/X86/X86InstrInfo.h21
-rw-r--r--lib/Target/X86/X86InstrInfo.td18
-rw-r--r--lib/Target/X86/X86InstrSSE.td163
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp81
-rw-r--r--lib/Target/X86/X86RegisterInfo.h17
-rw-r--r--lib/Target/X86/X86SchedHaswell.td4
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp8
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp10
-rw-r--r--lib/Target/X86/X86TargetMachine.h4
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp42
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h23
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp17
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h2
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp8
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h6
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp4
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h3
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp5
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h7
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp80
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp25
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp26
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp106
-rw-r--r--lib/Transforms/IPO/Inliner.cpp40
-rw-r--r--lib/Transforms/IPO/LowerBitSets.cpp276
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp54
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp48
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp88
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp18
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp142
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp148
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp63
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h24
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp203
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp98
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp25
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp148
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp7
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp258
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp818
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp26
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp24
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp39
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp57
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp157
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp62
-rw-r--r--lib/Transforms/ObjCARC/ARCInstKind.cpp96
-rw-r--r--lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h48
-rw-r--r--lib/Transforms/ObjCARC/Android.mk1
-rw-r--r--lib/Transforms/ObjCARC/BlotMapVector.h108
-rw-r--r--lib/Transforms/ObjCARC/CMakeLists.txt1
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp16
-rw-r--r--lib/Transforms/ObjCARC/ObjCARC.h55
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp11
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h4
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp11
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp1308
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp28
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysis.h5
-rw-r--r--lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp4
-rw-r--r--lib/Transforms/ObjCARC/PtrState.cpp404
-rw-r--r--lib/Transforms/ObjCARC/PtrState.h210
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp13
-rw-r--r--lib/Transforms/Scalar/Android.mk1
-rw-r--r--lib/Transforms/Scalar/BDCE.cpp31
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp1
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp4
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp6
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp60
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp18
-rw-r--r--lib/Transforms/Scalar/GVN.cpp134
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp85
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp401
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp15
-rw-r--r--lib/Transforms/Scalar/LICM.cpp79
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp37
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp37
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp1154
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp30
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp8
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp11
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp19
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp4
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp86
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp3
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp34
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp314
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp14
-rw-r--r--lib/Transforms/Scalar/SROA.cpp89
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp3
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp190
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp26
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp71
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp18
-rw-r--r--lib/Transforms/Scalar/Sink.cpp6
-rw-r--r--lib/Transforms/Scalar/StraightLineStrengthReduce.cpp395
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp3
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp70
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp127
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp77
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp30
-rw-r--r--lib/Transforms/Utils/CtorUtils.cpp3
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp132
-rw-r--r--lib/Transforms/Utils/Local.cpp22
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp23
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp9
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp4
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp11
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp5
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp4
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp353
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp118
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp3
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp264
-rw-r--r--lib/Transforms/Utils/SymbolRewriter.cpp13
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp6
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp35
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp412
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp114
821 files changed, 28633 insertions, 23865 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 4e95aa0..0b0fd50 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -407,9 +407,10 @@ AliasAnalysis::ModRefResult
AliasAnalysis::callCapturesBefore(const Instruction *I,
const AliasAnalysis::Location &MemLoc,
DominatorTree *DT) {
- if (!DT || !DL) return AliasAnalysis::ModRef;
+ if (!DT)
+ return AliasAnalysis::ModRef;
- const Value *Object = GetUnderlyingObject(MemLoc.Ptr, DL);
+ const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL);
if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
isa<Constant>(Object))
return AliasAnalysis::ModRef;
@@ -462,9 +463,8 @@ AliasAnalysis::~AliasAnalysis() {}
/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
/// AliasAnalysis interface before any other methods are called.
///
-void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
- DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) {
+ DL = NewDL;
auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
AA = &P->getAnalysis<AliasAnalysis>();
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index b860914..5865259 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -76,7 +77,7 @@ namespace {
bool runOnModule(Module &M) override {
this->M = &M;
- InitializeAliasAnalysis(this);
+ InitializeAliasAnalysis(this, &M.getDataLayout());
return false;
}
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index 5d61cf9..f98b578 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -44,7 +44,7 @@ namespace {
}
bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this); // set up super class
+ InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class
for(Module::global_iterator I = M.global_begin(),
E = M.global_end(); I != E; ++I) {
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 1bfb06d..4549c1e 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -49,7 +49,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeIVUsersPass(Registry);
initializeInstCountPass(Registry);
initializeIntervalPartitionPass(Registry);
- initializeJumpInstrTableInfoPass(Registry);
initializeLazyValueInfoPass(Registry);
initializeLibCallAliasAnalysisPass(Registry);
initializeLintPass(Registry);
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index e17b870..277956c 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -29,7 +29,6 @@ analysis_SRC_FILES := \
InstructionSimplify.cpp \
Interval.cpp \
IntervalPartition.cpp \
- JumpInstrTableInfo.cpp \
LazyCallGraph.cpp \
LazyValueInfo.cpp \
LibCallAliasAnalysis.cpp \
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 46ca6ee..be2282f 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -103,7 +103,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
const TargetLibraryInfo &TLI,
bool RoundToAlign = false) {
uint64_t Size;
- if (getObjectSize(V, Size, &DL, &TLI, RoundToAlign))
+ if (getObjectSize(V, Size, DL, &TLI, RoundToAlign))
return Size;
return AliasAnalysis::UnknownSize;
}
@@ -221,7 +221,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
- if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, AC,
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC,
BOp, DT))
break;
// FALL THROUGH.
@@ -292,7 +292,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
static const Value *
DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
SmallVectorImpl<VariableGEPIndex> &VarIndices,
- bool &MaxLookupReached, const DataLayout *DL,
+ bool &MaxLookupReached, const DataLayout &DL,
AssumptionCache *AC, DominatorTree *DT) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = MaxLookupSearchDepth;
@@ -341,16 +341,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
return V;
- // If we are lacking DataLayout information, we can't compute the offets of
- // elements computed by GEPs. However, we can handle bitcast equivalent
- // GEPs.
- if (!DL) {
- if (!GEPOp->hasAllZeroIndices())
- return V;
- V = GEPOp->getOperand(0);
- continue;
- }
-
unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
@@ -363,30 +353,30 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
if (FieldNo == 0) continue;
- BaseOffs += DL->getStructLayout(STy)->getElementOffset(FieldNo);
+ BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
// For an array/pointer, add the element offset, explicitly scaled.
if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
if (CIdx->isZero()) continue;
- BaseOffs += DL->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+ BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
continue;
}
- uint64_t Scale = DL->getTypeAllocSize(*GTI);
+ uint64_t Scale = DL.getTypeAllocSize(*GTI);
ExtensionKind Extension = EK_NotExtended;
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
unsigned Width = Index->getType()->getIntegerBitWidth();
- if (DL->getPointerSizeInBits(AS) > Width)
+ if (DL.getPointerSizeInBits(AS) > Width)
Extension = EK_SignExt;
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
- Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
- *DL, 0, AC, DT);
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL,
+ 0, AC, DT);
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
@@ -408,7 +398,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// Make sure that we have a scale that makes sense for this target's
// pointer size.
- if (unsigned ShiftBits = 64 - DL->getPointerSizeInBits(AS)) {
+ if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
@@ -461,9 +451,7 @@ namespace {
initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
- void initializePass() override {
- InitializeAliasAnalysis(this);
- }
+ bool doInitialization(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
@@ -612,7 +600,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
SmallVector<const Value *, 16> Worklist;
Worklist.push_back(Loc.Ptr);
do {
- const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
+ const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL);
if (!Visited.insert(V).second) {
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -815,6 +803,11 @@ static bool isAssumeIntrinsic(ImmutableCallSite CS) {
return false;
}
+bool BasicAliasAnalysis::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
+
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object. Since we only look at local properties of this
/// function, we really can't say much about this query. We do, however, use
@@ -825,7 +818,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
- const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
+ const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL);
// If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
@@ -1042,10 +1035,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL, AC2, DT);
+ GEP2MaxLookupReached, *DL, AC2, DT);
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AC1, DT);
+ GEP1MaxLookupReached, *DL, AC1, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -1074,14 +1067,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AC1, DT);
+ GEP1MaxLookupReached, *DL, AC1, DT);
int64_t GEP2BaseOffset;
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL, AC2, DT);
+ GEP2MaxLookupReached, *DL, AC2, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -1131,7 +1124,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AC1, DT);
+ GEP1MaxLookupReached, *DL, AC1, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -1200,7 +1193,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *V = GEP1VariableIndices[i].V;
bool SignKnownZero, SignKnownOne;
- ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+ ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, *DL,
0, AC1, nullptr, DT);
// Zero-extension widens the variable, and so forces the sign
@@ -1409,8 +1402,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
- const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
- const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
+ const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth);
+ const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth);
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
@@ -1533,6 +1526,9 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
if (!Inst)
return true;
+ if (VisitedPhiBBs.empty())
+ return true;
+
if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
return false;
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 8cd6ea4..14800f4 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
index 82fbfe0..53d748d 100644
--- a/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -45,9 +45,11 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <forward_list>
+#include <memory>
#include <tuple>
using namespace llvm;
@@ -77,7 +79,7 @@ static Optional<Value *> getTargetValue(Instruction *);
static bool hasUsefulEdges(Instruction *);
const StratifiedIndex StratifiedLink::SetSentinel =
- std::numeric_limits<StratifiedIndex>::max();
+ std::numeric_limits<StratifiedIndex>::max();
namespace {
// StratifiedInfo Attribute things.
@@ -85,11 +87,13 @@ typedef unsigned StratifiedAttr;
LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs;
LLVM_CONSTEXPR unsigned AttrAllIndex = 0;
LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1;
-LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 2;
+LLVM_CONSTEXPR unsigned AttrUnknownIndex = 2;
+LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 3;
LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex;
LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
LLVM_CONSTEXPR StratifiedAttr AttrNone = 0;
+LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex;
LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone;
// \brief StratifiedSets call for knowledge of "direction", so this is how we
@@ -144,9 +148,8 @@ struct FunctionInfo {
// Lots of functions have < 4 returns. Adjust as necessary.
SmallVector<Value *, 4> ReturnedValues;
- FunctionInfo(StratifiedSets<Value *> &&S,
- SmallVector<Value *, 4> &&RV)
- : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+ FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
+ : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
};
struct CFLAliasAnalysis;
@@ -229,6 +232,10 @@ public:
// Comparisons between global variables and other constants should be
// handled by BasicAA.
+ // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing
+ // a GlobalValue and ConstantExpr, but every query needs to have at least
+ // one Value tied to a Function, and neither GlobalValues nor ConstantExprs
+ // are.
if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
return AliasAnalysis::alias(LocA, LocB);
}
@@ -240,7 +247,7 @@ public:
return QueryResult;
}
- void initializePass() override { InitializeAliasAnalysis(this); }
+ bool doInitialization(Module &M) override;
};
void FunctionHandle::removeSelfFromCache() {
@@ -263,9 +270,19 @@ public:
llvm_unreachable("Unsupported instruction encountered");
}
+ void visitPtrToIntInst(PtrToIntInst &Inst) {
+ auto *Ptr = Inst.getOperand(0);
+ Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown));
+ }
+
+ void visitIntToPtrInst(IntToPtrInst &Inst) {
+ auto *Ptr = &Inst;
+ Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown));
+ }
+
void visitCastInst(CastInst &Inst) {
- Output.push_back(Edge(&Inst, Inst.getOperand(0), EdgeType::Assign,
- AttrNone));
+ Output.push_back(
+ Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone));
}
void visitBinaryOperator(BinaryOperator &Inst) {
@@ -377,7 +394,7 @@ public:
// I put this here to give us an upper bound on time taken by IPA. Is it
// really (realistically) needed? Keep in mind that we do have an n^2 algo.
- if (std::distance(Args.begin(), Args.end()) > (int) MaxSupportedArgs)
+ if (std::distance(Args.begin(), Args.end()) > (int)MaxSupportedArgs)
return false;
// Exit early if we'll fail anyway
@@ -429,7 +446,7 @@ public:
}
if (AddEdge)
Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign,
- StratifiedAttrs().flip()));
+ StratifiedAttrs().flip()));
}
if (Parameters.size() != Arguments.size())
@@ -571,8 +588,7 @@ private:
EdgeTypeT Weight;
Node Other;
- Edge(const EdgeTypeT &W, const Node &N)
- : Weight(W), Other(N) {}
+ Edge(const EdgeTypeT &W, const Node &N) : Weight(W), Other(N) {}
bool operator==(const Edge &E) const {
return Weight == E.Weight && Other == E.Other;
@@ -735,6 +751,25 @@ static Level directionOfEdgeType(EdgeType);
static void buildGraphFrom(CFLAliasAnalysis &, Function *,
SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
+// Gets the edges of a ConstantExpr as if it was an Instruction. This
+// function also acts on any nested ConstantExprs, adding the edges
+// of those to the given SmallVector as well.
+static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
+ SmallVectorImpl<Edge> &);
+
+// Given an Instruction, this will add it to the graph, along with any
+// Instructions that are potentially only available from said Instruction
+// For example, given the following line:
+// %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
+// addInstructionToGraph would add both the `load` and `getelementptr`
+// instructions to the graph appropriately.
+static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &,
+ SmallVectorImpl<Value *> &, NodeMapT &,
+ GraphT &);
+
+// Notes whether it would be pointless to add the given Value to our sets.
+static bool canSkipAddingToSets(Value *Val);
+
// Builds the graph + StratifiedSets for a function.
static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
@@ -806,6 +841,8 @@ static EdgeType flipWeight(EdgeType Initial) {
static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
SmallVectorImpl<Edge> &Output) {
+ assert(hasUsefulEdges(Inst) &&
+ "Expected instructions to have 'useful' edges");
GetEdgesVisitor v(Analysis, Output);
v.visit(Inst);
}
@@ -822,13 +859,41 @@ static Level directionOfEdgeType(EdgeType Weight) {
llvm_unreachable("Incomplete switch coverage");
}
-// Aside: We may remove graph construction entirely, because it doesn't really
-// buy us much that we don't already have. I'd like to add interprocedural
-// analysis prior to this however, in case that somehow requires the graph
-// produced by this for efficient execution
-static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
- SmallVectorImpl<Value *> &ReturnedValues,
- NodeMapT &Map, GraphT &Graph) {
+static void constexprToEdges(CFLAliasAnalysis &Analysis,
+ ConstantExpr &CExprToCollapse,
+ SmallVectorImpl<Edge> &Results) {
+ SmallVector<ConstantExpr *, 4> Worklist;
+ Worklist.push_back(&CExprToCollapse);
+
+ SmallVector<Edge, 8> ConstexprEdges;
+ while (!Worklist.empty()) {
+ auto *CExpr = Worklist.pop_back_val();
+ std::unique_ptr<Instruction> Inst(CExpr->getAsInstruction());
+
+ if (!hasUsefulEdges(Inst.get()))
+ continue;
+
+ ConstexprEdges.clear();
+ argsToEdges(Analysis, Inst.get(), ConstexprEdges);
+ for (auto &Edge : ConstexprEdges) {
+ if (Edge.From == Inst.get())
+ Edge.From = CExpr;
+ else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From))
+ Worklist.push_back(Nested);
+
+ if (Edge.To == Inst.get())
+ Edge.To = CExpr;
+ else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To))
+ Worklist.push_back(Nested);
+ }
+
+ Results.append(ConstexprEdges.begin(), ConstexprEdges.end());
+ }
+}
+
+static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
+ SmallVectorImpl<Value *> &ReturnedValues,
+ NodeMapT &Map, GraphT &Graph) {
const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
auto Pair = Map.insert(std::make_pair(Val, GraphT::Node()));
auto &Iter = Pair.first;
@@ -839,42 +904,86 @@ static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
return Iter->second;
};
+ // We don't want the edges of most "return" instructions, but we *do* want
+ // to know what can be returned.
+ if (isa<ReturnInst>(&Inst))
+ ReturnedValues.push_back(&Inst);
+
+ if (!hasUsefulEdges(&Inst))
+ return;
+
SmallVector<Edge, 8> Edges;
- for (auto &Bb : Fn->getBasicBlockList()) {
- for (auto &Inst : Bb.getInstList()) {
- // We don't want the edges of most "return" instructions, but we *do* want
- // to know what can be returned.
- if (auto *Ret = dyn_cast<ReturnInst>(&Inst))
- ReturnedValues.push_back(Ret);
-
- if (!hasUsefulEdges(&Inst))
- continue;
+ argsToEdges(Analysis, &Inst, Edges);
+
+ // In the case of an unused alloca (or similar), edges may be empty. Note
+ // that it exists so we can potentially answer NoAlias.
+ if (Edges.empty()) {
+ auto MaybeVal = getTargetValue(&Inst);
+ assert(MaybeVal.hasValue());
+ auto *Target = *MaybeVal;
+ findOrInsertNode(Target);
+ return;
+ }
- Edges.clear();
- argsToEdges(Analysis, &Inst, Edges);
+ const auto addEdgeToGraph = [&Graph, &findOrInsertNode](const Edge &E) {
+ auto To = findOrInsertNode(E.To);
+ auto From = findOrInsertNode(E.From);
+ auto FlippedWeight = flipWeight(E.Weight);
+ auto Attrs = E.AdditionalAttrs;
+ Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
+ std::make_pair(FlippedWeight, Attrs));
+ };
- // In the case of an unused alloca (or similar), edges may be empty. Note
- // that it exists so we can potentially answer NoAlias.
- if (Edges.empty()) {
- auto MaybeVal = getTargetValue(&Inst);
- assert(MaybeVal.hasValue());
- auto *Target = *MaybeVal;
- findOrInsertNode(Target);
- continue;
- }
+ SmallVector<ConstantExpr *, 4> ConstantExprs;
+ for (const Edge &E : Edges) {
+ addEdgeToGraph(E);
+ if (auto *Constexpr = dyn_cast<ConstantExpr>(E.To))
+ ConstantExprs.push_back(Constexpr);
+ if (auto *Constexpr = dyn_cast<ConstantExpr>(E.From))
+ ConstantExprs.push_back(Constexpr);
+ }
- for (const Edge &E : Edges) {
- auto To = findOrInsertNode(E.To);
- auto From = findOrInsertNode(E.From);
- auto FlippedWeight = flipWeight(E.Weight);
- auto Attrs = E.AdditionalAttrs;
- Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
- std::make_pair(FlippedWeight, Attrs));
- }
- }
+ for (ConstantExpr *CE : ConstantExprs) {
+ Edges.clear();
+ constexprToEdges(Analysis, *CE, Edges);
+ std::for_each(Edges.begin(), Edges.end(), addEdgeToGraph);
}
}
+// Aside: We may remove graph construction entirely, because it doesn't really
+// buy us much that we don't already have. I'd like to add interprocedural
+// analysis prior to this however, in case that somehow requires the graph
+// produced by this for efficient execution
+static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
+ SmallVectorImpl<Value *> &ReturnedValues,
+ NodeMapT &Map, GraphT &Graph) {
+ for (auto &Bb : Fn->getBasicBlockList())
+ for (auto &Inst : Bb.getInstList())
+ addInstructionToGraph(Analysis, Inst, ReturnedValues, Map, Graph);
+}
+
+static bool canSkipAddingToSets(Value *Val) {
+ // Constants can share instances, which may falsely unify multiple
+ // sets, e.g. in
+ // store i32* null, i32** %ptr1
+ // store i32* null, i32** %ptr2
+ // clearly ptr1 and ptr2 should not be unified into the same set, so
+ // we should filter out the (potentially shared) instance to
+ // i32* null.
+ if (isa<Constant>(Val)) {
+ bool Container = isa<ConstantVector>(Val) || isa<ConstantArray>(Val) ||
+ isa<ConstantStruct>(Val);
+ // TODO: Because all of these things are constant, we can determine whether
+ // the data is *actually* mutable at graph building time. This will probably
+ // come for free/cheap with offset awareness.
+ bool CanStoreMutableData =
+ isa<GlobalValue>(Val) || isa<ConstantExpr>(Val) || Container;
+ return !CanStoreMutableData;
+ }
+
+ return false;
+}
+
static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
NodeMapT Map;
GraphT Graph;
@@ -906,7 +1015,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
while (!Worklist.empty()) {
auto Node = Worklist.pop_back_val();
auto *CurValue = findValueOrDie(Node);
- if (isa<Constant>(CurValue) && !isa<GlobalValue>(CurValue))
+ if (canSkipAddingToSets(CurValue))
continue;
for (const auto &EdgeTuple : Graph.edgesFor(Node)) {
@@ -915,7 +1024,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
auto &OtherNode = std::get<1>(EdgeTuple);
auto *OtherValue = findValueOrDie(OtherNode);
- if (isa<Constant>(OtherValue) && !isa<GlobalValue>(OtherValue))
+ if (canSkipAddingToSets(OtherValue))
continue;
bool Added;
@@ -931,16 +1040,16 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
break;
}
- if (Added) {
- auto Aliasing = Weight.second;
- if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
- Aliasing.set(*MaybeCurIndex);
- if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
- Aliasing.set(*MaybeOtherIndex);
- Builder.noteAttributes(CurValue, Aliasing);
- Builder.noteAttributes(OtherValue, Aliasing);
+ auto Aliasing = Weight.second;
+ if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
+ Aliasing.set(*MaybeCurIndex);
+ if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
+ Aliasing.set(*MaybeOtherIndex);
+ Builder.noteAttributes(CurValue, Aliasing);
+ Builder.noteAttributes(OtherValue, Aliasing);
+
+ if (Added)
Worklist.push_back(OtherNode);
- }
}
}
}
@@ -950,7 +1059,12 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
// things that were present during construction being present in the graph.
// So, we add all present arguments here.
for (auto &Arg : Fn->args()) {
- Builder.add(&Arg);
+ if (!Builder.add(&Arg))
+ continue;
+
+ auto Attrs = valueToAttrIndex(&Arg);
+ if (Attrs.hasValue())
+ Builder.noteAttributes(&Arg, *Attrs);
}
return FunctionInfo(Builder.build(), std::move(ReturnedValues));
@@ -1034,3 +1148,8 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
return AliasAnalysis::NoAlias;
}
+
+bool CFLAliasAnalysis::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index d840037..ae40321 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -27,7 +27,6 @@ add_llvm_library(LLVMAnalysis
InstructionSimplify.cpp
Interval.cpp
IntervalPartition.cpp
- JumpInstrTableInfo.cpp
LazyCallGraph.cpp
LazyValueInfo.cpp
LibCallAliasAnalysis.cpp
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index fa5683c..46a2c43 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "code-metrics"
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index fcafb41..995465d 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -50,8 +50,7 @@ using namespace llvm;
/// Constant fold bitcast, symbolically evaluating it with DataLayout.
/// This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
-static Constant *FoldBitCast(Constant *C, Type *DestTy,
- const DataLayout &TD) {
+static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// Catch the obvious splat cases.
if (C->isNullValue() && !DestTy->isX86_MMXTy())
return Constant::getNullValue(DestTy);
@@ -84,11 +83,11 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// Now that we know that the input value is a vector of integers, just shift
// and insert them into our result.
- unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy);
+ unsigned BitShift = DL.getTypeAllocSizeInBits(SrcEltTy);
APInt Result(IT->getBitWidth(), 0);
for (unsigned i = 0; i != NumSrcElts; ++i) {
Result <<= BitShift;
- if (TD.isLittleEndian())
+ if (DL.isLittleEndian())
Result |= CDV->getElementAsInteger(NumSrcElts-i-1);
else
Result |= CDV->getElementAsInteger(i);
@@ -106,7 +105,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// vector so the code below can handle it uniformly.
if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
Constant *Ops = C; // don't take the address of C!
- return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
+ return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
}
// If this is a bitcast from constant vector -> vector, fold it.
@@ -138,7 +137,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
Type *DestIVTy =
VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
// Recursively handle this integer conversion, if possible.
- C = FoldBitCast(C, DestIVTy, TD);
+ C = FoldBitCast(C, DestIVTy, DL);
// Finally, IR can handle this now that #elts line up.
return ConstantExpr::getBitCast(C, DestTy);
@@ -162,7 +161,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// of the same size, and that their #elements is not the same. Do the
// conversion here, which depends on whether the input or output has
// more elements.
- bool isLittleEndian = TD.isLittleEndian();
+ bool isLittleEndian = DL.isLittleEndian();
SmallVector<Constant*, 32> Result;
if (NumDstElt < NumSrcElt) {
@@ -198,7 +197,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
unsigned Ratio = NumDstElt/NumSrcElt;
- unsigned DstBitSize = TD.getTypeSizeInBits(DstEltTy);
+ unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
// Loop over each source value, expanding into multiple results.
for (unsigned i = 0; i != NumSrcElt; ++i) {
@@ -235,10 +234,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
/// If this constant is a constant offset from a global, return the global and
/// the constant. Because of constantexprs, this function is recursive.
static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
- APInt &Offset, const DataLayout &TD) {
+ APInt &Offset, const DataLayout &DL) {
// Trivial case, constant is the global.
if ((GV = dyn_cast<GlobalValue>(C))) {
- unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType());
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType());
Offset = APInt(BitWidth, 0);
return true;
}
@@ -251,22 +250,22 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
if (CE->getOpcode() == Instruction::PtrToInt ||
CE->getOpcode() == Instruction::BitCast ||
CE->getOpcode() == Instruction::AddrSpaceCast)
- return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+ return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
GEPOperator *GEP = dyn_cast<GEPOperator>(CE);
if (!GEP)
return false;
- unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType());
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
APInt TmpOffset(BitWidth, 0);
// If the base isn't a global+constant, we aren't either.
- if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD))
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL))
return false;
// Otherwise, add any offset that our operands provide.
- if (!GEP->accumulateConstantOffset(TD, TmpOffset))
+ if (!GEP->accumulateConstantOffset(DL, TmpOffset))
return false;
Offset = TmpOffset;
@@ -276,11 +275,11 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
/// Recursive helper to read bits out of global. C is the constant being copied
/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
/// results into and BytesLeft is the number of bytes left in
-/// the CurPtr buffer. TD is the target data.
+/// the CurPtr buffer. DL is the DataLayout.
static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
unsigned char *CurPtr, unsigned BytesLeft,
- const DataLayout &TD) {
- assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
+ const DataLayout &DL) {
+ assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
"Out of range access");
// If this element is zero or undefined, we can just return since *CurPtr is
@@ -298,7 +297,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
int n = ByteOffset;
- if (!TD.isLittleEndian())
+ if (!DL.isLittleEndian())
n = IntBytes - n - 1;
CurPtr[i] = (unsigned char)(Val >> (n * 8));
++ByteOffset;
@@ -308,22 +307,22 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
if (CFP->getType()->isDoubleTy()) {
- C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
- return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
if (CFP->getType()->isFloatTy()){
- C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
- return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
if (CFP->getType()->isHalfTy()){
- C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD);
- return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
return false;
}
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
- const StructLayout *SL = TD.getStructLayout(CS->getType());
+ const StructLayout *SL = DL.getStructLayout(CS->getType());
unsigned Index = SL->getElementContainingOffset(ByteOffset);
uint64_t CurEltOffset = SL->getElementOffset(Index);
ByteOffset -= CurEltOffset;
@@ -331,11 +330,11 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
while (1) {
// If the element access is to the element itself and not to tail padding,
// read the bytes from the element.
- uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType());
+ uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
if (ByteOffset < EltSize &&
!ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
- BytesLeft, TD))
+ BytesLeft, DL))
return false;
++Index;
@@ -362,7 +361,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
isa<ConstantDataSequential>(C)) {
Type *EltTy = C->getType()->getSequentialElementType();
- uint64_t EltSize = TD.getTypeAllocSize(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
uint64_t NumElts;
@@ -373,7 +372,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
for (; Index != NumElts; ++Index) {
if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
- BytesLeft, TD))
+ BytesLeft, DL))
return false;
uint64_t BytesWritten = EltSize - Offset;
@@ -390,9 +389,9 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr &&
- CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) {
+ CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
- BytesLeft, TD);
+ BytesLeft, DL);
}
}
@@ -401,7 +400,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
}
static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
- const DataLayout &TD) {
+ const DataLayout &DL) {
PointerType *PTy = cast<PointerType>(C->getType());
Type *LoadTy = PTy->getElementType();
IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
@@ -423,14 +422,13 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
MapTy = Type::getInt64PtrTy(C->getContext(), AS);
else if (LoadTy->isVectorTy()) {
MapTy = PointerType::getIntNPtrTy(C->getContext(),
- TD.getTypeAllocSizeInBits(LoadTy),
- AS);
+ DL.getTypeAllocSizeInBits(LoadTy), AS);
} else
return nullptr;
- C = FoldBitCast(C, MapTy, TD);
- if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
- return FoldBitCast(Res, LoadTy, TD);
+ C = FoldBitCast(C, MapTy, DL);
+ if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, DL))
+ return FoldBitCast(Res, LoadTy, DL);
return nullptr;
}
@@ -440,7 +438,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
GlobalValue *GVal;
APInt Offset;
- if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
+ if (!IsConstantOffsetFromGlobal(C, GVal, Offset, DL))
return nullptr;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
@@ -455,16 +453,16 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
// If we're not accessing anything in this constant, the result is undefined.
if (Offset.getZExtValue() >=
- TD.getTypeAllocSize(GV->getInitializer()->getType()))
+ DL.getTypeAllocSize(GV->getInitializer()->getType()))
return UndefValue::get(IntType);
unsigned char RawBytes[32] = {0};
if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
- BytesLoaded, TD))
+ BytesLoaded, DL))
return nullptr;
APInt ResultVal = APInt(IntType->getBitWidth(), 0);
- if (TD.isLittleEndian()) {
+ if (DL.isLittleEndian()) {
ResultVal = RawBytes[BytesLoaded - 1];
for (unsigned i = 1; i != BytesLoaded; ++i) {
ResultVal <<= 8;
@@ -482,9 +480,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
}
static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
- const DataLayout *DL) {
- if (!DL)
- return nullptr;
+ const DataLayout &DL) {
auto *DestPtrTy = dyn_cast<PointerType>(CE->getType());
if (!DestPtrTy)
return nullptr;
@@ -499,7 +495,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
// If the type sizes are the same and a cast is legal, just directly
// cast the constant.
- if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) {
+ if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
Instruction::CastOps Cast = Instruction::BitCast;
// If we are going from a pointer to int or vice versa, we spell the cast
// differently.
@@ -530,7 +526,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
/// Return the value that a load from C would produce if it is constant and
/// determinable. If this is not determinable, return null.
Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
- const DataLayout *TD) {
+ const DataLayout &DL) {
// First, try the easy cases:
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
@@ -552,13 +548,13 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
}
if (CE->getOpcode() == Instruction::BitCast)
- if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD))
+ if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, DL))
return LoadedC;
// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
StringRef Str;
- if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) {
+ if (getConstantStringInfo(CE, Str) && !Str.empty()) {
unsigned StrLen = Str.size();
Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned NumBits = Ty->getPrimitiveSizeInBits();
@@ -568,7 +564,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
(isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
APInt StrVal(NumBits, 0);
APInt SingleChar(NumBits, 0);
- if (TD->isLittleEndian()) {
+ if (DL.isLittleEndian()) {
for (signed i = StrLen-1; i >= 0; i--) {
SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
StrVal = (StrVal << 8) | SingleChar;
@@ -593,7 +589,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// If this load comes from anywhere in a constant global, and if the global
// is all undef or zero, we know what it loads.
if (GlobalVariable *GV =
- dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
+ dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, DL))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
Type *ResTy = cast<PointerType>(C->getType())->getElementType();
if (GV->getInitializer()->isNullValue())
@@ -604,16 +600,15 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
}
// Try hard to fold loads from bitcasted strange and non-type-safe things.
- if (TD)
- return FoldReinterpretLoadFromConstPtr(CE, *TD);
- return nullptr;
+ return FoldReinterpretLoadFromConstPtr(CE, DL);
}
-static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
+static Constant *ConstantFoldLoadInst(const LoadInst *LI,
+ const DataLayout &DL) {
if (LI->isVolatile()) return nullptr;
if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
- return ConstantFoldLoadFromConstPtr(C, TD);
+ return ConstantFoldLoadFromConstPtr(C, DL);
return nullptr;
}
@@ -623,16 +618,16 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
/// these together. If target data info is available, it is provided as DL,
/// otherwise DL is null.
static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
- Constant *Op1, const DataLayout *DL){
+ Constant *Op1,
+ const DataLayout &DL) {
// SROA
// Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
// Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
// bits.
-
- if (Opc == Instruction::And && DL) {
- unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
+ if (Opc == Instruction::And) {
+ unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType());
APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
computeKnownBits(Op0, KnownZero0, KnownOne0, DL);
@@ -655,14 +650,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
// If the constant expr is something like &A[123] - &A[4].f, fold this into a
// constant. This happens frequently when iterating over a global array.
- if (Opc == Instruction::Sub && DL) {
+ if (Opc == Instruction::Sub) {
GlobalValue *GV1, *GV2;
APInt Offs1, Offs2;
- if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
- if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
- GV1 == GV2) {
- unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+ if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
+ if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
+ unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
// (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
// PtrToInt may change the bitwidth so we have convert to the right size
@@ -677,13 +671,10 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
/// If array indices are not pointer-sized integers, explicitly cast them so
/// that they aren't implicitly casted by the getelementptr.
-static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
- Type *ResultTy, const DataLayout *TD,
+static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, Type *ResultTy,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TD)
- return nullptr;
-
- Type *IntPtrTy = TD->getIntPtrType(ResultTy);
+ Type *IntPtrTy = DL.getIntPtrType(ResultTy);
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
@@ -708,7 +699,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
C = Folded;
}
@@ -733,14 +724,14 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
/// If we can symbolically evaluate the GEP constant expression, do so.
static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
- Type *ResultTy, const DataLayout *TD,
+ Type *ResultTy, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
Constant *Ptr = Ops[0];
- if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
+ if (!Ptr->getType()->getPointerElementType()->isSized() ||
!Ptr->getType()->isPointerTy())
return nullptr;
- Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
+ Type *IntPtrTy = DL.getIntPtrType(Ptr->getType());
Type *ResultElementTy = ResultTy->getPointerElementType();
// If this is a constant expr gep that is effectively computing an
@@ -760,19 +751,19 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Res = ConstantExpr::getSub(Res, CE->getOperand(1));
Res = ConstantExpr::getIntToPtr(Res, ResultTy);
if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
- Res = ConstantFoldConstantExpression(ResCE, TD, TLI);
+ Res = ConstantFoldConstantExpression(ResCE, DL, TLI);
return Res;
}
}
return nullptr;
}
- unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
+ unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
APInt Offset =
- APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(),
- makeArrayRef((Value *const*)
- Ops.data() + 1,
- Ops.size() - 1)));
+ APInt(BitWidth,
+ DL.getIndexedOffset(
+ Ptr->getType(),
+ makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1)));
Ptr = StripPtrCastKeepAS(Ptr);
// If this is a GEP of a GEP, fold it all into a single GEP.
@@ -790,8 +781,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
break;
Ptr = cast<Constant>(GEP->getOperand(0));
- Offset += APInt(BitWidth,
- TD->getIndexedOffset(Ptr->getType(), NestedOps));
+ Offset += APInt(BitWidth, DL.getIndexedOffset(Ptr->getType(), NestedOps));
Ptr = StripPtrCastKeepAS(Ptr);
}
@@ -831,7 +821,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
}
// Determine which element of the array the offset points into.
- APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+ APInt ElemSize(BitWidth, DL.getTypeAllocSize(ATy->getElementType()));
if (ElemSize == 0)
// The element size is 0. This may be [0 x Ty]*, so just use a zero
// index for this level and proceed to the next level to see if it can
@@ -850,7 +840,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// can't re-form this GEP in a regular form, so bail out. The pointer
// operand likely went through casts that are necessary to make the GEP
// sensible.
- const StructLayout &SL = *TD->getStructLayout(STy);
+ const StructLayout &SL = *DL.getStructLayout(STy);
if (Offset.uge(SL.getSizeInBytes()))
break;
@@ -882,7 +872,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// If we ended up indexing a member with a type that doesn't match
// the type of what the original indices indexed, add a cast.
if (Ty != ResultElementTy)
- C = FoldBitCast(C, ResultTy, *TD);
+ C = FoldBitCast(C, ResultTy, DL);
return C;
}
@@ -898,8 +888,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
/// Note that this fails if not all of the operands are constant. Otherwise,
/// this function can only fail when attempting to fold instructions like loads
/// and stores, which have no constant expression form.
-Constant *llvm::ConstantFoldInstruction(Instruction *I,
- const DataLayout *TD,
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Handle PHI nodes quickly here...
if (PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -919,7 +908,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
return nullptr;
// Fold the PHI's operands.
if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(NewC, TD, TLI);
+ C = ConstantFoldConstantExpression(NewC, DL, TLI);
// If the incoming value is a different constant to
// the one we saw previously, then give up.
if (CommonValue && C != CommonValue)
@@ -942,17 +931,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
// Fold the Instruction's operands.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op))
- Op = ConstantFoldConstantExpression(NewCE, TD, TLI);
+ Op = ConstantFoldConstantExpression(NewCE, DL, TLI);
Ops.push_back(Op);
}
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
- TD, TLI);
+ DL, TLI);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- return ConstantFoldLoadInst(LI, TD);
+ return ConstantFoldLoadInst(LI, DL);
if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
return ConstantExpr::getInsertValue(
@@ -967,11 +956,11 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
EVI->getIndices());
}
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, DL, TLI);
}
static Constant *
-ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL,
const TargetLibraryInfo *TLI,
SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
SmallVector<Constant *, 8> Ops;
@@ -982,25 +971,25 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
// a ConstantExpr, we don't have to process it again.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
if (FoldedOps.insert(NewCE).second)
- NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
+ NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps);
}
Ops.push_back(NewC);
}
if (CE->isCompare())
return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
- TD, TLI);
- return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
+ DL, TLI);
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, DL, TLI);
}
/// Attempt to fold the constant expression
/// using the specified DataLayout. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
- const DataLayout *TD,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
SmallPtrSet<ConstantExpr *, 4> FoldedOps;
- return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
+ return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps);
}
/// Attempt to constant fold an instruction with the
@@ -1015,12 +1004,12 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
///
Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
ArrayRef<Constant *> Ops,
- const DataLayout *TD,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) {
- if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+ if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], DL))
return C;
}
@@ -1040,10 +1029,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
// If the input is a inttoptr, eliminate the pair. This requires knowing
// the width of a pointer, so it can't be done in ConstantExpr::getCast.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
- if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+ if (CE->getOpcode() == Instruction::IntToPtr) {
Constant *Input = CE->getOperand(0);
unsigned InWidth = Input->getType()->getScalarSizeInBits();
- unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType());
+ unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType());
if (PtrWidth < InWidth) {
Constant *Mask =
ConstantInt::get(CE->getContext(),
@@ -1061,15 +1050,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
// This requires knowing the width of a pointer, so it can't be done in
// ConstantExpr::getCast.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
- if (TD && CE->getOpcode() == Instruction::PtrToInt) {
+ if (CE->getOpcode() == Instruction::PtrToInt) {
Constant *SrcPtr = CE->getOperand(0);
- unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType());
+ unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
if (MidIntSize >= SrcPtrSize) {
unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
if (SrcAS == DestTy->getPointerAddressSpace())
- return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+ return FoldBitCast(CE->getOperand(0), DestTy, DL);
}
}
}
@@ -1087,9 +1076,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::AddrSpaceCast:
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::BitCast:
- if (TD)
- return FoldBitCast(Ops[0], DestTy, *TD);
- return ConstantExpr::getBitCast(Ops[0], DestTy);
+ return FoldBitCast(Ops[0], DestTy, DL);
case Instruction::Select:
return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
case Instruction::ExtractElement:
@@ -1099,9 +1086,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
case Instruction::GetElementPtr:
- if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI))
+ if (Constant *C = CastGEPIndices(Ops, DestTy, DL, TLI))
return C;
- if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, DL, TLI))
return C;
return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
@@ -1113,43 +1100,44 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
/// returns a constant expression of the specified operands.
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *Ops0, Constant *Ops1,
- const DataLayout *TD,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// fold: icmp (inttoptr x), null -> icmp x, 0
// fold: icmp (ptrtoint x), 0 -> icmp x, null
// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
// fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
//
- // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+ // FIXME: The following comment is out of data and the DataLayout is here now.
+ // ConstantExpr::getCompare cannot do this, because it doesn't have DL
// around to know if bit truncation is happening.
if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
- if (TD && Ops1->isNullValue()) {
+ if (Ops1->isNullValue()) {
if (CE0->getOpcode() == Instruction::IntToPtr) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+ Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
IntPtrTy, false);
Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
if (CE0->getOpcode() == Instruction::PtrToInt) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
if (CE0->getType() == IntPtrTy) {
Constant *C = CE0->getOperand(0);
Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
}
}
}
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
- if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+ if (CE0->getOpcode() == CE1->getOpcode()) {
if (CE0->getOpcode() == Instruction::IntToPtr) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+ Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
@@ -1157,20 +1145,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
IntPtrTy, false);
Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
IntPtrTy, false);
- return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
if (CE0->getOpcode() == Instruction::PtrToInt) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
if (CE0->getType() == IntPtrTy &&
CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
- return ConstantFoldCompareInstOperands(Predicate,
- CE0->getOperand(0),
- CE1->getOperand(0),
- TD,
- TLI);
+ return ConstantFoldCompareInstOperands(
+ Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
}
}
}
@@ -1180,16 +1165,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
- Constant *LHS =
- ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
- TD, TLI);
- Constant *RHS =
- ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
- TD, TLI);
+ Constant *LHS = ConstantFoldCompareInstOperands(
+ Predicate, CE0->getOperand(0), Ops1, DL, TLI);
+ Constant *RHS = ConstantFoldCompareInstOperands(
+ Predicate, CE0->getOperand(1), Ops1, DL, TLI);
unsigned OpC =
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
Constant *Ops[] = { LHS, RHS };
- return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
+ return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, DL, TLI);
}
}
@@ -1451,26 +1434,16 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
default: break;
case Intrinsic::fabs:
return ConstantFoldFP(fabs, V, Ty);
-#if HAVE_LOG2
case Intrinsic::log2:
return ConstantFoldFP(log2, V, Ty);
-#endif
-#if HAVE_LOG
case Intrinsic::log:
return ConstantFoldFP(log, V, Ty);
-#endif
-#if HAVE_LOG10
case Intrinsic::log10:
return ConstantFoldFP(log10, V, Ty);
-#endif
-#if HAVE_EXP
case Intrinsic::exp:
return ConstantFoldFP(exp, V, Ty);
-#endif
-#if HAVE_EXP2
case Intrinsic::exp2:
return ConstantFoldFP(exp2, V, Ty);
-#endif
case Intrinsic::floor:
return ConstantFoldFP(floor, V, Ty);
case Intrinsic::ceil:
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index fda664b..3374b48 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -52,6 +52,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -59,6 +60,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -225,13 +227,11 @@ bool Dependence::isScalar(unsigned level) const {
//===----------------------------------------------------------------------===//
// FullDependence methods
-FullDependence::FullDependence(Instruction *Source,
- Instruction *Destination,
+FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
bool PossiblyLoopIndependent,
- unsigned CommonLevels) :
- Dependence(Source, Destination),
- Levels(CommonLevels),
- LoopIndependent(PossiblyLoopIndependent) {
+ unsigned CommonLevels)
+ : Dependence(Source, Destination), Levels(CommonLevels),
+ LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr;
}
@@ -625,14 +625,12 @@ void Dependence::dump(raw_ostream &OS) const {
OS << "!\n";
}
-
-
-static
-AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
- const Value *A,
- const Value *B) {
- const Value *AObj = GetUnderlyingObject(A);
- const Value *BObj = GetUnderlyingObject(B);
+static AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
+ const DataLayout &DL,
+ const Value *A,
+ const Value *B) {
+ const Value *AObj = GetUnderlyingObject(A, DL);
+ const Value *BObj = GetUnderlyingObject(B, DL);
return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
BObj, AA->getTypeStoreSize(BObj->getType()));
}
@@ -3314,7 +3312,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
Value *SrcPtr = getPointerOperand(Src);
Value *DstPtr = getPointerOperand(Dst);
- switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
+ switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
+ SrcPtr)) {
case AliasAnalysis::MayAlias:
case AliasAnalysis::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
@@ -3347,9 +3346,9 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n");
- UsefulGEP =
- isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
- isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+ UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+ isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
+ (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
}
unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
SmallVector<Subscript, 4> Pair(Pairs);
@@ -3472,8 +3471,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
LI->getLoopFor(Dst->getParent()),
Pair[SI].Loops);
Result.Consistent = false;
- }
- else if (Pair[SI].Classification == Subscript::ZIV) {
+ } else if (Pair[SI].Classification == Subscript::ZIV) {
// always separable
Separable.set(SI);
}
@@ -3525,8 +3523,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
DEBUG(dbgs() << ", SIV\n");
unsigned Level;
const SCEV *SplitIter = nullptr;
- if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
- Result, NewConstraint, SplitIter))
+ if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
+ SplitIter))
return nullptr;
break;
}
@@ -3574,8 +3572,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
unsigned Level;
const SCEV *SplitIter = nullptr;
DEBUG(dbgs() << "SIV\n");
- if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
- Result, NewConstraint, SplitIter))
+ if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
+ SplitIter))
return nullptr;
ConstrainedLevels.set(Level);
if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
@@ -3651,8 +3649,10 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
// update Result.DV from constraint vector
DEBUG(dbgs() << " updating\n");
- for (int SJ = ConstrainedLevels.find_first();
- SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
+ for (int SJ = ConstrainedLevels.find_first(); SJ >= 0;
+ SJ = ConstrainedLevels.find_next(SJ)) {
+ if (SJ > (int)CommonLevels)
+ break;
updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
return nullptr;
@@ -3759,8 +3759,8 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
assert(isLoadOrStore(Dst));
Value *SrcPtr = getPointerOperand(Src);
Value *DstPtr = getPointerOperand(Dst);
- assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
- AliasAnalysis::MustAlias);
+ assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
+ SrcPtr) == AliasAnalysis::MustAlias);
// establish loop nesting levels
establishNestingLevels(Src, Dst);
@@ -3775,9 +3775,9 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
- UsefulGEP =
- isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
- isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+ UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+ isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
+ (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
}
unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
SmallVector<Subscript, 4> Pair(Pairs);
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index ded1de7..9d607cc 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -49,7 +49,7 @@ public:
explicit CGPassManager()
: ModulePass(ID), PMDataManager() { }
- /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
bool runOnModule(Module &M) override;
@@ -142,9 +142,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
FPPassManager *FPP = (FPPassManager*)P;
// Run pass P on all functions in the current SCC.
- for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
- I != E; ++I) {
- if (Function *F = (*I)->getFunction()) {
+ for (CallGraphNode *CGN : CurSCC) {
+ if (Function *F = CGN->getFunction()) {
dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
{
TimeRegion PassTimer(getPassTimer(FPP));
@@ -165,7 +164,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
}
-/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// Scan the functions in the specified CFG and resync the
/// callgraph with the call sites found in it. This is used after
/// FunctionPasses have potentially munged the callgraph, and can be used after
/// CallGraphSCC passes to verify that they correctly updated the callgraph.
@@ -181,9 +180,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
<< " nodes:\n";
- for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
- I != E; ++I)
- (*I)->dump();
+ for (CallGraphNode *CGN : CurSCC)
+ CGN->dump();
);
bool MadeChange = false;
@@ -357,9 +355,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
DEBUG(if (MadeChange) {
dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
- for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
- I != E; ++I)
- (*I)->dump();
+ for (CallGraphNode *CGN : CurSCC)
+ CGN->dump();
if (DevirtualizedCall)
dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
@@ -372,15 +369,15 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
return DevirtualizedCall;
}
-/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the
-/// specified SCC. This keeps track of whether a function pass devirtualizes
+/// Execute the body of the entire pass manager on the specified SCC.
+/// This keeps track of whether a function pass devirtualizes
/// any calls and returns it in DevirtualizedCall.
bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
bool &DevirtualizedCall) {
bool Changed = false;
- // CallGraphUpToDate - Keep track of whether the callgraph is known to be
- // up-to-date or not. The CGSSC pass manager runs two types of passes:
+ // Keep track of whether the callgraph is known to be up-to-date or not.
+ // The CGSSC pass manager runs two types of passes:
// CallGraphSCC Passes and other random function passes. Because other
// random function passes are not CallGraph aware, they may clobber the
// call graph by introducing new calls or deleting other ones. This flag
@@ -433,7 +430,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
return Changed;
}
-/// run - Execute all of the passes scheduled for execution. Keep track of
+/// Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
bool CGPassManager::runOnModule(Module &M) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -519,7 +516,7 @@ bool CGPassManager::doFinalization(CallGraph &CG) {
// CallGraphSCC Implementation
//===----------------------------------------------------------------------===//
-/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// This informs the SCC and the pass manager that the specified
/// Old node has been deleted, and New is to be used in its place.
void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
assert(Old != New && "Should not replace node with self");
@@ -578,8 +575,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS,
CGP->add(this);
}
-/// getAnalysisUsage - For this class, we declare that we require and preserve
-/// the call graph. If the derived class implements this method, it should
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<CallGraphWrapperPass>();
@@ -609,9 +606,9 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override {
Out << Banner;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- if ((*I)->getFunction())
- (*I)->getFunction()->print(Out);
+ for (CallGraphNode *CGN : SCC) {
+ if (CGN->getFunction())
+ CGN->getFunction()->print(Out);
else
Out << "\nPrinting <null> Function\n";
}
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 607c068..2208f32 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -96,7 +96,7 @@ namespace {
}
bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this);
+ InitializeAliasAnalysis(this, &M.getDataLayout());
// Find non-addr taken globals.
AnalyzeGlobals(M);
@@ -322,7 +322,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
continue;
// Check the value being stored.
- Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
+ Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
+ GV->getParent()->getDataLayout());
if (!isAllocLikeFn(Ptr, TLI))
return false; // Too hard to analyze.
@@ -481,8 +482,8 @@ AliasAnalysis::AliasResult
GlobalsModRef::alias(const Location &LocA,
const Location &LocB) {
// Get the base object these pointers point to.
- const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
- const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
+ const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL);
+ const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL);
// If either of the underlying values is a global, they may be non-addr-taken
// globals, which we can answer queries about.
@@ -540,8 +541,9 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
// If we are asking for mod/ref info of a direct call with a pointer to a
// global we are tracking, return information if we have it.
+ const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
if (const GlobalValue *GV =
- dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
+ dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
if (GV->hasLocalLinkage())
if (const Function *F = CS.getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index cd494ba..eeb3b87 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -45,9 +45,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
typedef InstVisitor<CallAnalyzer, bool> Base;
friend class InstVisitor<CallAnalyzer, bool>;
- // DataLayout if available, or null.
- const DataLayout *const DL;
-
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
@@ -145,9 +142,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitUnreachableInst(UnreachableInst &I);
public:
- CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
- AssumptionCacheTracker *ACT, Function &Callee, int Threshold)
- : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
+ CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
+ Function &Callee, int Threshold)
+ : TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@@ -244,10 +241,8 @@ bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
/// Returns false if unable to compute the offset for any reason. Respects any
/// simplified values known during the analysis of this callsite.
bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
- if (!DL)
- return false;
-
- unsigned IntPtrWidth = DL->getPointerSizeInBits();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntPtrWidth = DL.getPointerSizeInBits();
assert(IntPtrWidth == Offset.getBitWidth());
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -263,12 +258,12 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
unsigned ElementIdx = OpC->getZExtValue();
- const StructLayout *SL = DL->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
continue;
}
- APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType()));
+ APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
}
return true;
@@ -289,9 +284,9 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Accumulate the allocated size.
if (I.isStaticAlloca()) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
Type *Ty = I.getAllocatedType();
- AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) :
- Ty->getPrimitiveSizeInBits());
+ AllocatedSize += DL.getTypeAllocSize(Ty);
}
// We will happily inline static alloca instructions.
@@ -327,7 +322,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
// Try to fold GEPs of constant-offset call site argument pointers. This
// requires target data and inbounds GEPs.
- if (DL && I.isInBounds()) {
+ if (I.isInBounds()) {
// Check if we have a base + offset for the pointer.
Value *Ptr = I.getPointerOperand();
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
@@ -396,7 +391,6 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
}
bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
- const DataLayout *DL = I.getDataLayout();
// Propagate constants through ptrtoint.
Constant *COp = dyn_cast<Constant>(I.getOperand(0));
if (!COp)
@@ -410,7 +404,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// Track base/offset pairs when converted to a plain integer provided the
// integer is large enough to represent the pointer.
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
- if (DL && IntegerSize >= DL->getPointerSizeInBits()) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ if (IntegerSize >= DL.getPointerSizeInBits()) {
std::pair<Value *, APInt> BaseAndOffset
= ConstantOffsetPtrs.lookup(I.getOperand(0));
if (BaseAndOffset.first)
@@ -433,7 +428,6 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
}
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
- const DataLayout *DL = I.getDataLayout();
// Propagate constants through ptrtoint.
Constant *COp = dyn_cast<Constant>(I.getOperand(0));
if (!COp)
@@ -448,7 +442,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
// modifications provided the integer is not too large.
Value *Op = I.getOperand(0);
unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
- if (DL && IntegerSize <= DL->getPointerSizeInBits()) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ if (IntegerSize <= DL.getPointerSizeInBits()) {
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
if (BaseAndOffset.first)
ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -485,12 +480,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
Constant *COp = dyn_cast<Constant>(Operand);
if (!COp)
COp = SimplifiedValues.lookup(Operand);
- if (COp)
+ if (COp) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
COp, DL)) {
SimplifiedValues[&I] = C;
return true;
}
+ }
// Disable any SROA on the argument to arbitrary unary operators.
disableSROA(Operand);
@@ -595,6 +592,7 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) {
bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (!isa<Constant>(LHS))
if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
LHS = SimpleLHS;
@@ -623,7 +621,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
bool CallAnalyzer::visitLoad(LoadInst &I) {
Value *SROAArg;
DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
if (I.isSimple()) {
accumulateSROACost(CostIt, InlineConstants::InstrCost);
return true;
@@ -638,7 +636,7 @@ bool CallAnalyzer::visitLoad(LoadInst &I) {
bool CallAnalyzer::visitStore(StoreInst &I) {
Value *SROAArg;
DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
if (I.isSimple()) {
accumulateSROACost(CostIt, InlineConstants::InstrCost);
return true;
@@ -788,7 +786,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
+ CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
@@ -976,10 +974,11 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
/// returns 0 if V is not a pointer, and returns the constant '0' if there are
/// no constant offsets applied.
ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
- if (!DL || !V->getType()->isPointerTy())
+ if (!V->getType()->isPointerTy())
return nullptr;
- unsigned IntPtrWidth = DL->getPointerSizeInBits();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntPtrWidth = DL.getPointerSizeInBits();
APInt Offset = APInt::getNullValue(IntPtrWidth);
// Even though we don't look through PHI nodes, we could be called on an
@@ -1003,7 +1002,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(V).second);
- Type *IntPtrTy = DL->getIntPtrType(V->getContext());
+ Type *IntPtrTy = DL.getIntPtrType(V->getContext());
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
}
@@ -1034,16 +1033,17 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
assert(NumVectorInstructions == 0);
FiftyPercentVectorBonus = Threshold;
TenPercentVectorBonus = Threshold / 2;
+ const DataLayout &DL = F.getParent()->getDataLayout();
// Give out bonuses per argument, as the instructions setting them up will
// be gone after inlining.
for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
- if (DL && CS.isByValArgument(I)) {
+ if (CS.isByValArgument(I)) {
// We approximate the number of loads and stores needed by dividing the
// size of the byval type by the target's pointer size.
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
- unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType());
- unsigned PointerSize = DL->getPointerSizeInBits();
+ unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
+ unsigned PointerSize = DL.getPointerSizeInBits();
// Ceiling division.
unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
@@ -1333,8 +1333,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(Callee->getDataLayout(), TTIWP->getTTI(*Callee),
- ACT, *Callee, Threshold);
+ CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 140753c..b88b249 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -113,6 +114,8 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
/// return true. Otherwise, return false.
bool IVUsers::AddUsersImpl(Instruction *I,
SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+
// Add this IV user to the Processed set before returning false to ensure that
// all IV users are members of the set. See IVUsers::isIVUserOrOperand.
if (!Processed.insert(I).second)
@@ -124,14 +127,14 @@ bool IVUsers::AddUsersImpl(Instruction *I,
// IVUsers is used by LSR which assumes that all SCEV expressions are safe to
// pass to SCEVExpander. Expressions are not safe to expand if they represent
// operations that are not safe to speculate, namely integer division.
- if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, DL))
+ if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I))
return false;
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
- if (Width > 64 || (DL && !DL->isLegalInteger(Width)))
+ if (Width > 64 || !DL.isLegalInteger(Width))
return false;
// Get the symbolic expression for this instruction.
@@ -253,8 +256,6 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 0cb0982..99c477d 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -45,13 +45,13 @@ STATISTIC(NumReassoc, "Number of reassociations");
namespace {
struct Query {
- const DataLayout *DL;
+ const DataLayout &DL;
const TargetLibraryInfo *TLI;
const DominatorTree *DT;
AssumptionCache *AC;
const Instruction *CxtI;
- Query(const DataLayout *DL, const TargetLibraryInfo *tli,
+ Query(const DataLayout &DL, const TargetLibraryInfo *tli,
const DominatorTree *dt, AssumptionCache *ac = nullptr,
const Instruction *cxti = nullptr)
: DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {}
@@ -584,7 +584,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -601,17 +601,11 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
/// folding.
-static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
- Value *&V,
+static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
bool AllowNonInbounds = false) {
assert(V->getType()->getScalarType()->isPointerTy());
- // Without DataLayout, just be conservative for now. Theoretically, more could
- // be done in this case.
- if (!DL)
- return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0);
-
- Type *IntPtrTy = DL->getIntPtrType(V->getType())->getScalarType();
+ Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
// Even though we don't look through PHI nodes, we could be called on an
@@ -621,7 +615,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
if ((!AllowNonInbounds && !GEP->isInBounds()) ||
- !GEP->accumulateConstantOffset(*DL, Offset))
+ !GEP->accumulateConstantOffset(DL, Offset))
break;
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -646,8 +640,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
/// \brief Compute the constant difference between two pointer values.
/// If the difference is not a constant, returns zero.
-static Constant *computePointerDifference(const DataLayout *DL,
- Value *LHS, Value *RHS) {
+static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
+ Value *RHS) {
Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
@@ -783,7 +777,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -962,7 +956,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
}
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -971,7 +965,7 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
}
Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -980,7 +974,7 @@ Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
}
Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -988,7 +982,7 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
RecursionLimit);
}
-Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1092,7 +1086,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1110,7 +1104,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1138,7 +1132,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
}
Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1217,7 +1211,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1235,7 +1229,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1263,7 +1257,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
}
Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1387,7 +1381,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -1411,7 +1405,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
}
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1445,7 +1439,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
}
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1596,9 +1590,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
- if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+ Q.DT))
return Op0;
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+ Q.DT))
return Op1;
}
@@ -1643,7 +1639,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1831,7 +1827,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1888,7 +1884,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1948,10 +1944,10 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
// If the C and C++ standards are ever made sufficiently restrictive in this
// area, it may be possible to update LLVM's semantics accordingly and reinstate
// this optimization.
-static Constant *computePointerICmp(const DataLayout *DL,
+static Constant *computePointerICmp(const DataLayout &DL,
const TargetLibraryInfo *TLI,
- CmpInst::Predicate Pred,
- Value *LHS, Value *RHS) {
+ CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS) {
// First, skip past any trivial no-ops.
LHS = LHS->stripPointerCasts();
RHS = RHS->stripPointerCasts();
@@ -2369,8 +2365,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
- if (MaxRecurse && Q.DL && isa<PtrToIntInst>(LI) &&
- Q.DL->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
+ if (MaxRecurse && isa<PtrToIntInst>(LI) &&
+ Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
@@ -3024,7 +3020,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
Instruction *CxtI) {
@@ -3054,8 +3050,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Pred == FCmpInst::FCMP_TRUE)
return ConstantInt::get(GetCompareTy(LHS), 1);
- if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef
- return UndefValue::get(GetCompareTy(LHS));
+ // fcmp pred x, undef and fcmp pred undef, x
+ // fold to true if unordered, false if ordered
+ if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
+ // Choosing NaN for the undef will always make unordered comparison succeed
+ // and ordered comparison fail.
+ return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred));
+ }
// fcmp x,x -> true/false. Not all compares are foldable.
if (LHS == RHS) {
@@ -3135,7 +3136,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -3230,7 +3231,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
}
Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -3264,10 +3265,10 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
return Ops[0];
Type *Ty = PtrTy->getElementType();
- if (Q.DL && Ty->isSized()) {
+ if (Ty->isSized()) {
Value *P;
uint64_t C;
- uint64_t TyAllocSize = Q.DL->getTypeAllocSize(Ty);
+ uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty);
// getelementptr P, N -> P if P points to a type of zero size.
if (TyAllocSize == 0)
return Ops[0];
@@ -3275,7 +3276,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
// The following transforms are only safe if the ptrtoint cast
// doesn't truncate the pointers.
if (Ops[1]->getType()->getScalarSizeInBits() ==
- Q.DL->getPointerSizeInBits(AS)) {
+ Q.DL.getPointerSizeInBits(AS)) {
auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
if (match(P, m_Zero()))
return Constant::getNullValue(GEPTy);
@@ -3320,7 +3321,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
}
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL,
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -3357,7 +3358,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
}
Value *llvm::SimplifyInsertValueInst(
- Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout *DL,
+ Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL,
const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI),
@@ -3405,7 +3406,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
return nullptr;
}
-Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -3502,7 +3503,7 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
@@ -3510,7 +3511,7 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const FastMathFlags &FMF, const DataLayout *DL,
+ const FastMathFlags &FMF, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -3528,7 +3529,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
@@ -3604,7 +3605,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
}
Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
- User::op_iterator ArgEnd, const DataLayout *DL,
+ User::op_iterator ArgEnd, const DataLayout &DL,
const TargetLibraryInfo *TLI, const DominatorTree *DT,
AssumptionCache *AC, const Instruction *CxtI) {
return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI),
@@ -3612,7 +3613,7 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
}
Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyCall(V, Args.begin(), Args.end(),
@@ -3621,7 +3622,7 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
/// SimplifyInstruction - See if we can compute a simplified version of this
/// instruction. If not, this returns null.
-Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
+Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC) {
Value *Result;
@@ -3769,12 +3770,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
/// This routine returns 'true' only when *it* simplifies something. The passed
/// in simplified value does not count toward this.
static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
- const DataLayout *DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
AssumptionCache *AC) {
bool Simplified = false;
SmallSetVector<Instruction *, 8> Worklist;
+ const DataLayout &DL = I->getModule()->getDataLayout();
// If we have an explicit value to collapse to, do that round of the
// simplification loop by hand initially.
@@ -3822,19 +3823,18 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
return Simplified;
}
-bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL,
+bool llvm::recursivelySimplifyInstruction(Instruction *I,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
AssumptionCache *AC) {
- return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AC);
+ return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC);
}
bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
- const DataLayout *DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
AssumptionCache *AC) {
assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
assert(SimpleV && "Must provide a simplified value.");
- return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AC);
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
}
diff --git a/lib/Analysis/JumpInstrTableInfo.cpp b/lib/Analysis/JumpInstrTableInfo.cpp
deleted file mode 100644
index 7aae2a5..0000000
--- a/lib/Analysis/JumpInstrTableInfo.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- JumpInstrTableInfo.cpp: Info for Jump-Instruction Tables ----------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Information about jump-instruction tables that have been created by
-/// JumpInstrTables pass.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jiti"
-
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/MathExtras.h"
-
-using namespace llvm;
-
-INITIALIZE_PASS(JumpInstrTableInfo, "jump-instr-table-info",
- "Jump-Instruction Table Info", true, true)
-char JumpInstrTableInfo::ID = 0;
-
-ImmutablePass *llvm::createJumpInstrTableInfoPass() {
- return new JumpInstrTableInfo();
-}
-
-ModulePass *llvm::createJumpInstrTableInfoPass(unsigned Bound) {
- // This cast is always safe, since Bound is always in a subset of uint64_t.
- uint64_t B = static_cast<uint64_t>(Bound);
- return new JumpInstrTableInfo(B);
-}
-
-JumpInstrTableInfo::JumpInstrTableInfo(uint64_t ByteAlign)
- : ImmutablePass(ID), Tables(), ByteAlignment(ByteAlign) {
- if (!llvm::isPowerOf2_64(ByteAlign)) {
- // Note that we don't explicitly handle overflow here, since we handle the 0
- // case explicitly when a caller actually tries to create jumptable entries,
- // and this is the return value on overflow.
- ByteAlignment = llvm::NextPowerOf2(ByteAlign);
- }
-
- initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTableInfo::~JumpInstrTableInfo() {}
-
-void JumpInstrTableInfo::insertEntry(FunctionType *TableFunTy, Function *Target,
- Function *Jump) {
- Tables[TableFunTy].push_back(JumpPair(Target, Jump));
-}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 87c31fd..e6f586a 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -191,7 +191,7 @@ public:
/// Merge the specified lattice value into this one, updating this
/// one and returning true if anything changed.
- bool mergeIn(const LVILatticeVal &RHS) {
+ bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
if (RHS.isUndefined() || isOverdefined()) return false;
if (RHS.isOverdefined()) return markOverdefined();
@@ -215,11 +215,9 @@ public:
// Unless we can prove that the two Constants are different, we must
// move to overdefined.
- // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding.
- if (ConstantInt *Res = dyn_cast<ConstantInt>(
- ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
- getConstant(),
- RHS.getNotConstant())))
+ if (ConstantInt *Res =
+ dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
+ CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL)))
if (Res->isOne())
return markNotConstant(RHS.getNotConstant());
@@ -241,11 +239,9 @@ public:
// Unless we can prove that the two Constants are different, we must
// move to overdefined.
- // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding.
- if (ConstantInt *Res = dyn_cast<ConstantInt>(
- ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
- getNotConstant(),
- RHS.getConstant())))
+ if (ConstantInt *Res =
+ dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
+ CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL)))
if (Res->isOne())
return false;
@@ -346,21 +342,17 @@ namespace {
/// Push BV onto BlockValueStack unless it's already in there.
/// Returns true on success.
bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) {
- if (BlockValueSet.count(BV))
+ if (!BlockValueSet.insert(BV).second)
return false; // It's already in the stack.
BlockValueStack.push(BV);
- BlockValueSet.insert(BV);
return true;
}
- /// A pointer to the cache of @llvm.assume calls.
- AssumptionCache *AC;
- /// An optional DL pointer.
- const DataLayout *DL;
- /// An optional DT pointer.
- DominatorTree *DT;
-
+ AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls.
+ const DataLayout &DL; ///< A mandatory DataLayout
+ DominatorTree *DT; ///< An optional DT pointer.
+
friend struct LVIValueHandle;
void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
@@ -426,7 +418,7 @@ namespace {
OverDefinedCache.clear();
}
- LazyValueInfoCache(AssumptionCache *AC, const DataLayout *DL = nullptr,
+ LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL,
DominatorTree *DT = nullptr)
: AC(AC), DL(DL), DT(DT) {}
};
@@ -579,11 +571,13 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
if (LoadInst *L = dyn_cast<LoadInst>(I)) {
return L->getPointerAddressSpace() == 0 &&
- GetUnderlyingObject(L->getPointerOperand()) == Ptr;
+ GetUnderlyingObject(L->getPointerOperand(),
+ L->getModule()->getDataLayout()) == Ptr;
}
if (StoreInst *S = dyn_cast<StoreInst>(I)) {
return S->getPointerAddressSpace() == 0 &&
- GetUnderlyingObject(S->getPointerOperand()) == Ptr;
+ GetUnderlyingObject(S->getPointerOperand(),
+ S->getModule()->getDataLayout()) == Ptr;
}
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
if (MI->isVolatile()) return false;
@@ -593,11 +587,13 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
if (!Len || Len->isZero()) return false;
if (MI->getDestAddressSpace() == 0)
- if (GetUnderlyingObject(MI->getRawDest()) == Ptr)
+ if (GetUnderlyingObject(MI->getRawDest(),
+ MI->getModule()->getDataLayout()) == Ptr)
return true;
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
if (MTI->getSourceAddressSpace() == 0)
- if (GetUnderlyingObject(MTI->getRawSource()) == Ptr)
+ if (GetUnderlyingObject(MTI->getRawSource(),
+ MTI->getModule()->getDataLayout()) == Ptr)
return true;
}
return false;
@@ -614,10 +610,11 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
if (isKnownNonNull(Val)) {
NotNull = true;
} else {
- Value *UnderlyingVal = GetUnderlyingObject(Val);
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+ Value *UnderlyingVal = GetUnderlyingObject(Val, DL);
// If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
// inside InstructionDereferencesPointer either.
- if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) {
+ if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, DL, 1)) {
for (Instruction &I : *BB) {
if (InstructionDereferencesPointer(&I, UnderlyingVal)) {
NotNull = true;
@@ -651,7 +648,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
if (EdgesMissing)
continue;
- Result.mergeIn(EdgeResult);
+ Result.mergeIn(EdgeResult, DL);
// If we hit overdefined, exit early. The BlockVals entry is already set
// to overdefined.
@@ -696,7 +693,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
if (EdgesMissing)
continue;
- Result.mergeIn(EdgeResult);
+ Result.mergeIn(EdgeResult, DL);
// If we hit overdefined, exit early. The BlockVals entry is already set
// to overdefined.
@@ -735,7 +732,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
if (!AssumeVH)
continue;
auto *I = cast<CallInst>(AssumeVH);
- if (!isValidAssumeForContext(I, BBI, DL, DT))
+ if (!isValidAssumeForContext(I, BBI, DT))
continue;
Value *C = I->getArgOperand(0);
@@ -745,7 +742,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
if (BBLV.isOverdefined())
BBLV = Result;
else
- BBLV.mergeIn(Result);
+ BBLV.mergeIn(Result, DL);
}
}
}
@@ -857,10 +854,10 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
- // Calculate the range of values that would satisfy the comparison.
+ // Calculate the range of values that are allowed by the comparison
ConstantRange CmpRange(CI->getValue());
ConstantRange TrueValues =
- ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+ ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange);
if (NegOffset) // Apply the offset from above.
TrueValues = TrueValues.subtract(NegOffset->getValue());
@@ -1104,27 +1101,27 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
/// This lazily constructs the LazyValueInfoCache.
static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
- const DataLayout *DL = nullptr,
+ const DataLayout *DL,
DominatorTree *DT = nullptr) {
- if (!PImpl)
- PImpl = new LazyValueInfoCache(AC, DL, DT);
+ if (!PImpl) {
+ assert(DL && "getCache() called with a null DataLayout");
+ PImpl = new LazyValueInfoCache(AC, *DL, DT);
+ }
return *static_cast<LazyValueInfoCache*>(PImpl);
}
bool LazyValueInfo::runOnFunction(Function &F) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ const DataLayout &DL = F.getParent()->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
-
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (PImpl)
- getCache(PImpl, AC, DL, DT).clear();
+ getCache(PImpl, AC, &DL, DT).clear();
// Fully lazy.
return false;
@@ -1139,15 +1136,16 @@ void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
- delete &getCache(PImpl, AC);
+ delete &getCache(PImpl, AC, nullptr);
PImpl = nullptr;
}
}
Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
Instruction *CxtI) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, DL, DT).getValueInBlock(V, BB, CxtI);
+ getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1164,8 +1162,9 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
BasicBlock *ToBB,
Instruction *CxtI) {
+ const DataLayout &DL = FromBB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1177,9 +1176,10 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
return nullptr;
}
-static LazyValueInfo::Tristate
-getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result,
- const DataLayout *DL, TargetLibraryInfo *TLI) {
+static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
+ LVILatticeVal &Result,
+ const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
// If we know the value is a constant, evaluate the conditional.
Constant *Res = nullptr;
@@ -1250,8 +1250,9 @@ LazyValueInfo::Tristate
LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
+ const DataLayout &DL = FromBB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
return getPredicateResult(Pred, C, Result, DL, TLI);
}
@@ -1259,18 +1260,23 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
LazyValueInfo::Tristate
LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI) {
- LVILatticeVal Result = getCache(PImpl, AC, DL, DT).getValueAt(V, CxtI);
+ const DataLayout &DL = CxtI->getModule()->getDataLayout();
+ LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
return getPredicateResult(Pred, C, Result, DL, TLI);
}
void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
- if (PImpl)
- getCache(PImpl, AC, DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+ if (PImpl) {
+ const DataLayout &DL = PredBB->getModule()->getDataLayout();
+ getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+ }
}
void LazyValueInfo::eraseBlock(BasicBlock *BB) {
- if (PImpl)
- getCache(PImpl, AC, DL, DT).eraseBlock(BB);
+ if (PImpl) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+ getCache(PImpl, AC, &DL, DT).eraseBlock(BB);
+ }
}
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index 016f8c5..f6025e3 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -36,7 +36,11 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll(); // Does not transform code
}
-
+bool LibCallAliasAnalysis::runOnFunction(Function &F) {
+ // set up super class
+ InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
+ return false;
+}
/// AnalyzeLibCallDetails - Given a call to a function with the specified
/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index cf752dd..328b186 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -80,18 +80,6 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
.Default(EHPersonality::Unknown);
}
-bool llvm::isAsynchronousEHPersonality(EHPersonality Pers) {
- // The two SEH personality functions can catch asynch exceptions. We assume
- // unknown personalities don't catch asynch exceptions.
- switch (Pers) {
- case EHPersonality::MSVC_X86SEH:
- case EHPersonality::MSVC_Win64SEH:
- return true;
- default: return false;
- }
- llvm_unreachable("invalid enum");
-}
-
bool llvm::canSimplifyInvokeNoUnwind(const InvokeInst *II) {
const LandingPadInst *LP = II->getLandingPadInst();
EHPersonality Personality = classifyEHPersonality(LP->getPersonalityFn());
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 874ed0a..65a90d7 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -59,10 +59,10 @@ using namespace llvm;
namespace {
namespace MemRef {
- static unsigned Read = 1;
- static unsigned Write = 2;
- static unsigned Callee = 4;
- static unsigned Branchee = 8;
+ static const unsigned Read = 1;
+ static const unsigned Write = 2;
+ static const unsigned Callee = 4;
+ static const unsigned Branchee = 8;
}
class Lint : public FunctionPass, public InstVisitor<Lint> {
@@ -98,8 +98,8 @@ namespace {
void visitInsertElementInst(InsertElementInst &I);
void visitUnreachableInst(UnreachableInst &I);
- Value *findValue(Value *V, bool OffsetOk) const;
- Value *findValueImpl(Value *V, bool OffsetOk,
+ Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
SmallPtrSetImpl<Value *> &Visited) const;
public:
@@ -107,7 +107,6 @@ namespace {
AliasAnalysis *AA;
AssumptionCache *AC;
DominatorTree *DT;
- const DataLayout *DL;
TargetLibraryInfo *TLI;
std::string Messages;
@@ -129,27 +128,33 @@ namespace {
}
void print(raw_ostream &O, const Module *M) const override {}
- void WriteValue(const Value *V) {
- if (!V) return;
- if (isa<Instruction>(V)) {
- MessagesStr << *V << '\n';
- } else {
- V->printAsOperand(MessagesStr, true, Mod);
- MessagesStr << '\n';
+ void WriteValues(ArrayRef<const Value *> Vs) {
+ for (const Value *V : Vs) {
+ if (!V)
+ continue;
+ if (isa<Instruction>(V)) {
+ MessagesStr << *V << '\n';
+ } else {
+ V->printAsOperand(MessagesStr, true, Mod);
+ MessagesStr << '\n';
+ }
}
}
- // CheckFailed - A check failed, so print out the condition and the message
- // that failed. This provides a nice place to put a breakpoint if you want
- // to see why something is not correct.
- void CheckFailed(const Twine &Message,
- const Value *V1 = nullptr, const Value *V2 = nullptr,
- const Value *V3 = nullptr, const Value *V4 = nullptr) {
- MessagesStr << Message.str() << "\n";
- WriteValue(V1);
- WriteValue(V2);
- WriteValue(V3);
- WriteValue(V4);
+ /// \brief A check failed, so printout out the condition and the message.
+ ///
+ /// This provides a nice place to put a breakpoint if you want to see why
+ /// something is not correct.
+ void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
+
+ /// \brief A check failed (with values to print).
+ ///
+ /// This calls the Message-only version so that the above is easier to set
+ /// a breakpoint on.
+ template <typename T1, typename... Ts>
+ void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
+ CheckFailed(Message);
+ WriteValues({V1, Vs...});
}
};
}
@@ -165,16 +170,8 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
// Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, M) \
- do { if (!(C)) { CheckFailed(M); return; } } while (0)
-#define Assert1(C, M, V1) \
- do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
-#define Assert2(C, M, V1, V2) \
- do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
-#define Assert3(C, M, V1, V2, V3) \
- do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
-#define Assert4(C, M, V1, V2, V3, V4) \
- do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+#define Assert(C, ...) \
+ do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
// Lint::run - This is the main Analysis entry point for a
// function.
@@ -184,8 +181,6 @@ bool Lint::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
visit(F);
dbgs() << MessagesStr.str();
@@ -196,8 +191,8 @@ bool Lint::runOnFunction(Function &F) {
void Lint::visitFunction(Function &F) {
// This isn't undefined behavior, it's just a little unusual, and it's a
// fairly common mistake to neglect to name a function.
- Assert1(F.hasName() || F.hasLocalLinkage(),
- "Unusual: Unnamed function with non-local linkage", &F);
+ Assert(F.hasName() || F.hasLocalLinkage(),
+ "Unusual: Unnamed function with non-local linkage", &F);
// TODO: Check for irreducible control flow.
}
@@ -205,27 +200,30 @@ void Lint::visitFunction(Function &F) {
void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
+ const DataLayout &DL = CS->getModule()->getDataLayout();
visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
0, nullptr, MemRef::Callee);
- if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
- Assert1(CS.getCallingConv() == F->getCallingConv(),
- "Undefined behavior: Caller and callee calling convention differ",
- &I);
+ if (Function *F = dyn_cast<Function>(findValue(Callee, DL,
+ /*OffsetOk=*/false))) {
+ Assert(CS.getCallingConv() == F->getCallingConv(),
+ "Undefined behavior: Caller and callee calling convention differ",
+ &I);
FunctionType *FT = F->getFunctionType();
unsigned NumActualArgs = CS.arg_size();
- Assert1(FT->isVarArg() ?
- FT->getNumParams() <= NumActualArgs :
- FT->getNumParams() == NumActualArgs,
- "Undefined behavior: Call argument count mismatches callee "
- "argument count", &I);
+ Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
+ : FT->getNumParams() == NumActualArgs,
+ "Undefined behavior: Call argument count mismatches callee "
+ "argument count",
+ &I);
- Assert1(FT->getReturnType() == I.getType(),
- "Undefined behavior: Call return type mismatches "
- "callee return type", &I);
+ Assert(FT->getReturnType() == I.getType(),
+ "Undefined behavior: Call return type mismatches "
+ "callee return type",
+ &I);
// Check argument types (in case the callee was casted) and attributes.
// TODO: Verify that caller and callee attributes are compatible.
@@ -235,9 +233,10 @@ void Lint::visitCallSite(CallSite CS) {
Value *Actual = *AI;
if (PI != PE) {
Argument *Formal = PI++;
- Assert1(Formal->getType() == Actual->getType(),
- "Undefined behavior: Call argument type mismatches "
- "callee parameter type", &I);
+ Assert(Formal->getType() == Actual->getType(),
+ "Undefined behavior: Call argument type mismatches "
+ "callee parameter type",
+ &I);
// Check that noalias arguments don't alias other arguments. This is
// not fully precise because we don't know the sizes of the dereferenced
@@ -246,9 +245,9 @@ void Lint::visitCallSite(CallSite CS) {
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
if (AI != BI && (*BI)->getType()->isPointerTy()) {
AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
- Assert1(Result != AliasAnalysis::MustAlias &&
- Result != AliasAnalysis::PartialAlias,
- "Unusual: noalias argument aliases another argument", &I);
+ Assert(Result != AliasAnalysis::MustAlias &&
+ Result != AliasAnalysis::PartialAlias,
+ "Unusual: noalias argument aliases another argument", &I);
}
// Check that an sret argument points to valid memory.
@@ -256,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) {
Type *Ty =
cast<PointerType>(Formal->getType())->getElementType();
visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
- DL ? DL->getABITypeAlignment(Ty) : 0,
- Ty, MemRef::Read | MemRef::Write);
+ DL.getABITypeAlignment(Ty), Ty,
+ MemRef::Read | MemRef::Write);
}
}
}
@@ -266,10 +265,11 @@ void Lint::visitCallSite(CallSite CS) {
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
AI != AE; ++AI) {
- Value *Obj = findValue(*AI, /*OffsetOk=*/true);
- Assert1(!isa<AllocaInst>(Obj),
- "Undefined behavior: Call with \"tail\" keyword references "
- "alloca", &I);
+ Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true);
+ Assert(!isa<AllocaInst>(Obj),
+ "Undefined behavior: Call with \"tail\" keyword references "
+ "alloca",
+ &I);
}
@@ -294,13 +294,13 @@ void Lint::visitCallSite(CallSite CS) {
// overlap is not distinguished from the case where nothing is known.
uint64_t Size = 0;
if (const ConstantInt *Len =
- dyn_cast<ConstantInt>(findValue(MCI->getLength(),
- /*OffsetOk=*/false)))
+ dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL,
+ /*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
Size = Len->getValue().getZExtValue();
- Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
- AliasAnalysis::MustAlias,
- "Undefined behavior: memcpy source and destination overlap", &I);
+ Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+ AliasAnalysis::MustAlias,
+ "Undefined behavior: memcpy source and destination overlap", &I);
break;
}
case Intrinsic::memmove: {
@@ -324,9 +324,9 @@ void Lint::visitCallSite(CallSite CS) {
}
case Intrinsic::vastart:
- Assert1(I.getParent()->getParent()->isVarArg(),
- "Undefined behavior: va_start called in a non-varargs function",
- &I);
+ Assert(I.getParent()->getParent()->isVarArg(),
+ "Undefined behavior: va_start called in a non-varargs function",
+ &I);
visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
0, nullptr, MemRef::Read | MemRef::Write);
@@ -369,14 +369,13 @@ void Lint::visitInvokeInst(InvokeInst &I) {
void Lint::visitReturnInst(ReturnInst &I) {
Function *F = I.getParent()->getParent();
- Assert1(!F->doesNotReturn(),
- "Unusual: Return statement in function with noreturn attribute",
- &I);
+ Assert(!F->doesNotReturn(),
+ "Unusual: Return statement in function with noreturn attribute", &I);
if (Value *V = I.getReturnValue()) {
- Value *Obj = findValue(V, /*OffsetOk=*/true);
- Assert1(!isa<AllocaInst>(Obj),
- "Unusual: Returning alloca value", &I);
+ Value *Obj =
+ findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true);
+ Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
}
}
@@ -390,45 +389,47 @@ void Lint::visitMemoryReference(Instruction &I,
if (Size == 0)
return;
- Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
- Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
- "Undefined behavior: Null pointer dereference", &I);
- Assert1(!isa<UndefValue>(UnderlyingObject),
- "Undefined behavior: Undef pointer dereference", &I);
- Assert1(!isa<ConstantInt>(UnderlyingObject) ||
- !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
- "Unusual: All-ones pointer dereference", &I);
- Assert1(!isa<ConstantInt>(UnderlyingObject) ||
- !cast<ConstantInt>(UnderlyingObject)->isOne(),
- "Unusual: Address one pointer dereference", &I);
+ Value *UnderlyingObject =
+ findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true);
+ Assert(!isa<ConstantPointerNull>(UnderlyingObject),
+ "Undefined behavior: Null pointer dereference", &I);
+ Assert(!isa<UndefValue>(UnderlyingObject),
+ "Undefined behavior: Undef pointer dereference", &I);
+ Assert(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+ "Unusual: All-ones pointer dereference", &I);
+ Assert(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isOne(),
+ "Unusual: Address one pointer dereference", &I);
if (Flags & MemRef::Write) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
- Assert1(!GV->isConstant(),
- "Undefined behavior: Write to read-only memory", &I);
- Assert1(!isa<Function>(UnderlyingObject) &&
- !isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Write to text section", &I);
+ Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
+ &I);
+ Assert(!isa<Function>(UnderlyingObject) &&
+ !isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Write to text section", &I);
}
if (Flags & MemRef::Read) {
- Assert1(!isa<Function>(UnderlyingObject),
- "Unusual: Load from function body", &I);
- Assert1(!isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Load from block address", &I);
+ Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
+ &I);
+ Assert(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Load from block address", &I);
}
if (Flags & MemRef::Callee) {
- Assert1(!isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Call to block address", &I);
+ Assert(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Call to block address", &I);
}
if (Flags & MemRef::Branchee) {
- Assert1(!isa<Constant>(UnderlyingObject) ||
- isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Branch to non-blockaddress", &I);
+ Assert(!isa<Constant>(UnderlyingObject) ||
+ isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Branch to non-blockaddress", &I);
}
// Check for buffer overflows and misalignment.
// Only handles memory references that read/write something simple like an
// alloca instruction or a global variable.
+ auto &DL = I.getModule()->getDataLayout();
int64_t Offset = 0;
if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) {
// OK, so the access is to a constant offset from Ptr. Check that Ptr is
@@ -439,37 +440,37 @@ void Lint::visitMemoryReference(Instruction &I,
if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
Type *ATy = AI->getAllocatedType();
- if (DL && !AI->isArrayAllocation() && ATy->isSized())
- BaseSize = DL->getTypeAllocSize(ATy);
+ if (!AI->isArrayAllocation() && ATy->isSized())
+ BaseSize = DL.getTypeAllocSize(ATy);
BaseAlign = AI->getAlignment();
- if (DL && BaseAlign == 0 && ATy->isSized())
- BaseAlign = DL->getABITypeAlignment(ATy);
+ if (BaseAlign == 0 && ATy->isSized())
+ BaseAlign = DL.getABITypeAlignment(ATy);
} else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
// If the global may be defined differently in another compilation unit
// then don't warn about funky memory accesses.
if (GV->hasDefinitiveInitializer()) {
Type *GTy = GV->getType()->getElementType();
- if (DL && GTy->isSized())
- BaseSize = DL->getTypeAllocSize(GTy);
+ if (GTy->isSized())
+ BaseSize = DL.getTypeAllocSize(GTy);
BaseAlign = GV->getAlignment();
- if (DL && BaseAlign == 0 && GTy->isSized())
- BaseAlign = DL->getABITypeAlignment(GTy);
+ if (BaseAlign == 0 && GTy->isSized())
+ BaseAlign = DL.getABITypeAlignment(GTy);
}
}
// Accesses from before the start or after the end of the object are not
// defined.
- Assert1(Size == AliasAnalysis::UnknownSize ||
- BaseSize == AliasAnalysis::UnknownSize ||
- (Offset >= 0 && Offset + Size <= BaseSize),
- "Undefined behavior: Buffer overflow", &I);
+ Assert(Size == AliasAnalysis::UnknownSize ||
+ BaseSize == AliasAnalysis::UnknownSize ||
+ (Offset >= 0 && Offset + Size <= BaseSize),
+ "Undefined behavior: Buffer overflow", &I);
// Accesses that say that the memory is more aligned than it is are not
// defined.
- if (DL && Align == 0 && Ty && Ty->isSized())
- Align = DL->getABITypeAlignment(Ty);
- Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
- "Undefined behavior: Memory reference address is misaligned", &I);
+ if (Align == 0 && Ty && Ty->isSized())
+ Align = DL.getABITypeAlignment(Ty);
+ Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
+ "Undefined behavior: Memory reference address is misaligned", &I);
}
}
@@ -487,36 +488,35 @@ void Lint::visitStoreInst(StoreInst &I) {
}
void Lint::visitXor(BinaryOperator &I) {
- Assert1(!isa<UndefValue>(I.getOperand(0)) ||
- !isa<UndefValue>(I.getOperand(1)),
- "Undefined result: xor(undef, undef)", &I);
+ Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: xor(undef, undef)", &I);
}
void Lint::visitSub(BinaryOperator &I) {
- Assert1(!isa<UndefValue>(I.getOperand(0)) ||
- !isa<UndefValue>(I.getOperand(1)),
- "Undefined result: sub(undef, undef)", &I);
+ Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: sub(undef, undef)", &I);
}
void Lint::visitLShr(BinaryOperator &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(
+ findValue(I.getOperand(1), I.getModule()->getDataLayout(),
+ /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
}
void Lint::visitAShr(BinaryOperator &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
+ I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
}
void Lint::visitShl(BinaryOperator &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
+ I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
}
static bool
@@ -598,9 +598,9 @@ void Lint::visitEHBeginCatch(IntrinsicInst *II) {
// The begin catch must occur in a landing pad block or all paths
// to it must have come from a landing pad.
- Assert1(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
- "llvm.eh.begincatch may be reachable without passing a landingpad",
- II);
+ Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
+ "llvm.eh.begincatch may be reachable without passing a landingpad",
+ II);
// Reset the visited block list.
VisitedBlocks.clear();
@@ -612,13 +612,13 @@ void Lint::visitEHBeginCatch(IntrinsicInst *II) {
bool EndCatchFound = allSuccessorsReachEndCatch(
CatchBB, std::next(static_cast<BasicBlock::iterator>(II)),
&SecondBeginCatch, VisitedBlocks);
- Assert2(
+ Assert(
SecondBeginCatch == nullptr,
"llvm.eh.begincatch may be called a second time before llvm.eh.endcatch",
II, SecondBeginCatch);
- Assert1(EndCatchFound,
- "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
- II);
+ Assert(EndCatchFound,
+ "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
+ II);
}
static bool allPredCameFromBeginCatch(
@@ -691,17 +691,16 @@ void Lint::visitEHEndCatch(IntrinsicInst *II) {
bool BeginCatchFound =
allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II),
&SecondEndCatch, VisitedBlocks);
- Assert2(
+ Assert(
SecondEndCatch == nullptr,
"llvm.eh.endcatch may be called a second time after llvm.eh.begincatch",
II, SecondEndCatch);
- Assert1(
- BeginCatchFound,
- "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
- II);
+ Assert(BeginCatchFound,
+ "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
+ II);
}
-static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
+static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
AssumptionCache *AC) {
// Assume undef could be zero.
if (isa<UndefValue>(V))
@@ -742,30 +741,30 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
}
void Lint::visitSDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitUDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitSRem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitURem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitAllocaInst(AllocaInst &I) {
if (isa<ConstantInt>(I.getArraySize()))
// This isn't undefined behavior, it's just an obvious pessimization.
- Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
- "Pessimization: Static alloca outside of entry block", &I);
+ Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+ "Pessimization: Static alloca outside of entry block", &I);
// TODO: Check for an unusual size (MSB set?)
}
@@ -779,32 +778,33 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {
visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0,
nullptr, MemRef::Branchee);
- Assert1(I.getNumDestinations() != 0,
- "Undefined behavior: indirectbr with no destinations", &I);
+ Assert(I.getNumDestinations() != 0,
+ "Undefined behavior: indirectbr with no destinations", &I);
}
void Lint::visitExtractElementInst(ExtractElementInst &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
- /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
- "Undefined result: extractelement index out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(
+ findValue(I.getIndexOperand(), I.getModule()->getDataLayout(),
+ /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+ "Undefined result: extractelement index out of range", &I);
}
void Lint::visitInsertElementInst(InsertElementInst &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getOperand(2),
- /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(I.getType()->getNumElements()),
- "Undefined result: insertelement index out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(
+ findValue(I.getOperand(2), I.getModule()->getDataLayout(),
+ /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(I.getType()->getNumElements()),
+ "Undefined result: insertelement index out of range", &I);
}
void Lint::visitUnreachableInst(UnreachableInst &I) {
// This isn't undefined behavior, it's merely suspicious.
- Assert1(&I == I.getParent()->begin() ||
- std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
- "Unusual: unreachable immediately preceded by instruction without "
- "side effects", &I);
+ Assert(&I == I.getParent()->begin() ||
+ std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+ "Unusual: unreachable immediately preceded by instruction without "
+ "side effects",
+ &I);
}
/// findValue - Look through bitcasts and simple memory reference patterns
@@ -814,13 +814,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) {
/// Most analysis passes don't require this logic, because instcombine
/// will simplify most of these kinds of things away. But it's a goal of
/// this Lint pass to be useful even on non-optimized IR.
-Value *Lint::findValue(Value *V, bool OffsetOk) const {
+Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const {
SmallPtrSet<Value *, 4> Visited;
- return findValueImpl(V, OffsetOk, Visited);
+ return findValueImpl(V, DL, OffsetOk, Visited);
}
/// findValueImpl - Implementation helper for findValue.
-Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
SmallPtrSetImpl<Value *> &Visited) const {
// Detect self-referential values.
if (!Visited.insert(V).second)
@@ -841,7 +841,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
break;
if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
BB, BBI, 6, AA))
- return findValueImpl(U, OffsetOk, Visited);
+ return findValueImpl(U, DL, OffsetOk, Visited);
if (BBI != BB->begin()) break;
BB = BB->getUniquePredecessor();
if (!BB) break;
@@ -850,40 +850,38 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
if (Value *W = PN->hasConstantValue())
if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
if (CI->isNoopCast(DL))
- return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+ return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited);
} else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
Ex->getIndices()))
if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
// Same as above, but for ConstantExpr instead of Instruction.
if (Instruction::isCast(CE->getOpcode())) {
if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
- CE->getOperand(0)->getType(),
- CE->getType(),
- DL ? DL->getIntPtrType(V->getType()) :
- Type::getInt64Ty(V->getContext())))
- return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+ CE->getOperand(0)->getType(), CE->getType(),
+ DL.getIntPtrType(V->getType())))
+ return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited);
} else if (CE->getOpcode() == Instruction::ExtractValue) {
ArrayRef<unsigned> Indices = CE->getIndices();
if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
}
}
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
}
return V;
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 5042eb9..aed3b04 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
using namespace llvm;
@@ -62,7 +63,8 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
/// This uses the pointee type to determine how many bytes need to be safe to
/// load from the pointer.
bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
- unsigned Align, const DataLayout *DL) {
+ unsigned Align) {
+ const DataLayout &DL = ScanFrom->getModule()->getDataLayout();
int64_t ByteOffset = 0;
Value *Base = V;
Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
@@ -87,19 +89,19 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
}
PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0;
+ uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType());
// If we found a base allocated type from either an alloca or global variable,
// try to see if we are definitively within the allocated region. We need to
// know the size of the base type and the loaded type to do anything in this
- // case, so only try this when we have the DataLayout available.
- if (BaseType && BaseType->isSized() && DL) {
+ // case.
+ if (BaseType && BaseType->isSized()) {
if (BaseAlign == 0)
- BaseAlign = DL->getPrefTypeAlignment(BaseType);
+ BaseAlign = DL.getPrefTypeAlignment(BaseType);
if (Align <= BaseAlign) {
// Check if the load is within the bounds of the underlying object.
- if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) &&
+ if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
(Align == 0 || (ByteOffset % Align) == 0))
return true;
}
@@ -133,16 +135,13 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
else
continue;
- // Handle trivial cases even w/o DataLayout or other work.
+ // Handle trivial cases.
if (AccessedPtr == V)
return true;
- if (!DL)
- continue;
-
auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
- LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType()))
+ LoadSize <= DL.getTypeStoreSize(AccessedTy->getElementType()))
return true;
}
return false;
@@ -176,13 +175,10 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
- // Try to get the DataLayout for this module. This may be null, in which case
- // the optimizations will be limited.
- const DataLayout *DL = ScanBB->getDataLayout();
+ const DataLayout &DL = ScanBB->getModule()->getDataLayout();
// Try to get the store size for the type.
- uint64_t AccessSize = DL ? DL->getTypeStoreSize(AccessTy)
- : AA ? AA->getTypeStoreSize(AccessTy) : 0;
+ uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
Value *StrippedPtr = Ptr->stripPointerCasts();
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index 7bedd40..1818e93 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -15,11 +15,13 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/VectorUtils.h"
using namespace llvm;
@@ -49,6 +51,13 @@ unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
/// Maximum SIMD width.
const unsigned VectorizerParams::MaxVectorWidth = 64;
+/// \brief We collect interesting dependences up to this threshold.
+static cl::opt<unsigned> MaxInterestingDependence(
+ "max-interesting-dependences", cl::Hidden,
+ cl::desc("Maximum number of interesting dependences collected by "
+ "loop-access analysis (default = 100)"),
+ cl::init(100));
+
bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
}
@@ -120,8 +129,8 @@ void LoopAccessInfo::RuntimePointerCheck::insert(
AliasSetId.push_back(ASId);
}
-bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I,
- unsigned J) const {
+bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
+ unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
// No need to check if two readonly pointers intersect.
if (!IsWritePtr[I] && !IsWritePtr[J])
return false;
@@ -134,11 +143,19 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(unsigned I,
if (AliasSetId[I] != AliasSetId[J])
return false;
+ // If PtrPartition is set omit checks between pointers of the same partition.
+ // Partition number -1 means that the pointer is used in multiple partitions.
+ // In this case we can't omit the check.
+ if (PtrPartition && (*PtrPartition)[I] != -1 &&
+ (*PtrPartition)[I] == (*PtrPartition)[J])
+ return false;
+
return true;
}
-void LoopAccessInfo::RuntimePointerCheck::print(raw_ostream &OS,
- unsigned Depth) const {
+void LoopAccessInfo::RuntimePointerCheck::print(
+ raw_ostream &OS, unsigned Depth,
+ const SmallVectorImpl<int> *PtrPartition) const {
unsigned NumPointers = Pointers.size();
if (NumPointers == 0)
return;
@@ -147,10 +164,16 @@ void LoopAccessInfo::RuntimePointerCheck::print(raw_ostream &OS,
unsigned N = 0;
for (unsigned I = 0; I < NumPointers; ++I)
for (unsigned J = I + 1; J < NumPointers; ++J)
- if (needsChecking(I, J)) {
+ if (needsChecking(I, J, PtrPartition)) {
OS.indent(Depth) << N++ << ":\n";
- OS.indent(Depth + 2) << *Pointers[I] << "\n";
- OS.indent(Depth + 2) << *Pointers[J] << "\n";
+ OS.indent(Depth + 2) << *Pointers[I];
+ if (PtrPartition)
+ OS << " (Partition: " << (*PtrPartition)[I] << ")";
+ OS << "\n";
+ OS.indent(Depth + 2) << *Pointers[J];
+ if (PtrPartition)
+ OS << " (Partition: " << (*PtrPartition)[J] << ")";
+ OS << "\n";
}
}
@@ -165,11 +188,9 @@ public:
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
- /// \brief Set of potential dependent memory accesses.
- typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
-
- AccessAnalysis(const DataLayout *Dl, AliasAnalysis *AA, DepCandidates &DA) :
- DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
+ AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA,
+ MemoryDepChecker::DepCandidates &DA)
+ : DL(Dl), AST(*AA), DepCands(DA), IsRTCheckNeeded(false) {}
/// \brief Register a load and whether it is only read from.
void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
@@ -217,14 +238,14 @@ private:
/// Set of all accesses.
PtrAccessSet Accesses;
+ const DataLayout &DL;
+
/// Set of accesses that need a further dependence check.
MemAccessInfoSet CheckDeps;
/// Set of pointers that are read only.
SmallPtrSet<Value*, 16> ReadOnlyPtr;
- const DataLayout *DL;
-
/// An alias set tracker to partition the access set by underlying object and
//intrinsic property (such as TBAA metadata).
AliasSetTracker AST;
@@ -232,7 +253,7 @@ private:
/// Sets of potentially dependent accesses - members of one set share an
/// underlying pointer. The set "CheckDeps" identfies which sets really need a
/// dependence check.
- DepCandidates &DepCands;
+ MemoryDepChecker::DepCandidates &DepCands;
bool IsRTCheckNeeded;
};
@@ -252,8 +273,8 @@ static bool hasComputableBounds(ScalarEvolution *SE,
/// \brief Check the stride of the pointer and ensure that it does not wrap in
/// the address space.
-static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
- const Loop *Lp, const ValueToValueMap &StridesMap);
+static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap);
bool AccessAnalysis::canCheckPtrAtRT(
LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons,
@@ -289,10 +310,10 @@ bool AccessAnalysis::canCheckPtrAtRT(
++NumReadPtrChecks;
if (hasComputableBounds(SE, StridesMap, Ptr) &&
- // When we run after a failing dependency check we have to make sure we
- // don't have wrapping pointers.
+ // When we run after a failing dependency check we have to make sure
+ // we don't have wrapping pointers.
(!ShouldCheckStride ||
- isStridedPtr(SE, DL, Ptr, TheLoop, StridesMap) == 1)) {
+ isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
// The id of the dependence set.
unsigned DepId;
@@ -362,7 +383,7 @@ void AccessAnalysis::processMemAccesses() {
DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
DEBUG(dbgs() << " AST: "; AST.dump());
- DEBUG(dbgs() << "LAA: Accesses:\n");
+ DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
DEBUG({
for (auto A : Accesses)
dbgs() << "\t" << *A.getPointer() << " (" <<
@@ -460,124 +481,6 @@ void AccessAnalysis::processMemAccesses() {
}
}
-namespace {
-/// \brief Checks memory dependences among accesses to the same underlying
-/// object to determine whether there vectorization is legal or not (and at
-/// which vectorization factor).
-///
-/// This class works under the assumption that we already checked that memory
-/// locations with different underlying pointers are "must-not alias".
-/// We use the ScalarEvolution framework to symbolically evalutate access
-/// functions pairs. Since we currently don't restructure the loop we can rely
-/// on the program order of memory accesses to determine their safety.
-/// At the moment we will only deem accesses as safe for:
-/// * A negative constant distance assuming program order.
-///
-/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
-/// a[i] = tmp; y = a[i];
-///
-/// The latter case is safe because later checks guarantuee that there can't
-/// be a cycle through a phi node (that is, we check that "x" and "y" is not
-/// the same variable: a header phi can only be an induction or a reduction, a
-/// reduction can't have a memory sink, an induction can't have a memory
-/// source). This is important and must not be violated (or we have to
-/// resort to checking for cycles through memory).
-///
-/// * A positive constant distance assuming program order that is bigger
-/// than the biggest memory access.
-///
-/// tmp = a[i] OR b[i] = x
-/// a[i+2] = tmp y = b[i+2];
-///
-/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
-///
-/// * Zero distances and all accesses have the same size.
-///
-class MemoryDepChecker {
-public:
- typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
- typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
-
- MemoryDepChecker(ScalarEvolution *Se, const DataLayout *Dl, const Loop *L)
- : SE(Se), DL(Dl), InnermostLoop(L), AccessIdx(0),
- ShouldRetryWithRuntimeCheck(false) {}
-
- /// \brief Register the location (instructions are given increasing numbers)
- /// of a write access.
- void addAccess(StoreInst *SI) {
- Value *Ptr = SI->getPointerOperand();
- Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
- InstMap.push_back(SI);
- ++AccessIdx;
- }
-
- /// \brief Register the location (instructions are given increasing numbers)
- /// of a write access.
- void addAccess(LoadInst *LI) {
- Value *Ptr = LI->getPointerOperand();
- Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
- InstMap.push_back(LI);
- ++AccessIdx;
- }
-
- /// \brief Check whether the dependencies between the accesses are safe.
- ///
- /// Only checks sets with elements in \p CheckDeps.
- bool areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
- MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides);
-
- /// \brief The maximum number of bytes of a vector register we can vectorize
- /// the accesses safely with.
- unsigned getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
-
- /// \brief In same cases when the dependency check fails we can still
- /// vectorize the loop with a dynamic array access check.
- bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
-
-private:
- ScalarEvolution *SE;
- const DataLayout *DL;
- const Loop *InnermostLoop;
-
- /// \brief Maps access locations (ptr, read/write) to program order.
- DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
-
- /// \brief Memory access instructions in program order.
- SmallVector<Instruction *, 16> InstMap;
-
- /// \brief The program order index to be used for the next instruction.
- unsigned AccessIdx;
-
- // We can access this many bytes in parallel safely.
- unsigned MaxSafeDepDistBytes;
-
- /// \brief If we see a non-constant dependence distance we can still try to
- /// vectorize this loop with runtime checks.
- bool ShouldRetryWithRuntimeCheck;
-
- /// \brief Check whether there is a plausible dependence between the two
- /// accesses.
- ///
- /// Access \p A must happen before \p B in program order. The two indices
- /// identify the index into the program order map.
- ///
- /// This function checks whether there is a plausible dependence (or the
- /// absence of such can't be proved) between the two accesses. If there is a
- /// plausible dependence but the dependence distance is bigger than one
- /// element access it records this distance in \p MaxSafeDepDistBytes (if this
- /// distance is smaller than any other distance encountered so far).
- /// Otherwise, this function returns true signaling a possible dependence.
- bool isDependent(const MemAccessInfo &A, unsigned AIdx,
- const MemAccessInfo &B, unsigned BIdx,
- const ValueToValueMap &Strides);
-
- /// \brief Check whether the data dependence could prevent store-load
- /// forwarding.
- bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize);
-};
-
-} // end anonymous namespace
-
static bool isInBoundsGep(Value *Ptr) {
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
return GEP->isInBounds();
@@ -585,8 +488,8 @@ static bool isInBoundsGep(Value *Ptr) {
}
/// \brief Check whether the access through \p Ptr has a constant stride.
-static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
- const Loop *Lp, const ValueToValueMap &StridesMap) {
+static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap) {
const Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
@@ -640,7 +543,8 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
return 0;
}
- int64_t Size = DL->getTypeAllocSize(PtrTy->getElementType());
+ auto &DL = Lp->getHeader()->getModule()->getDataLayout();
+ int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
const APInt &APStepVal = C->getValue()->getValue();
// Huge step value - give up.
@@ -665,6 +569,54 @@ static int isStridedPtr(ScalarEvolution *SE, const DataLayout *DL, Value *Ptr,
return Stride;
}
+bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
+ switch (Type) {
+ case NoDep:
+ case Forward:
+ case BackwardVectorizable:
+ return true;
+
+ case Unknown:
+ case ForwardButPreventsForwarding:
+ case Backward:
+ case BackwardVectorizableButPreventsForwarding:
+ return false;
+ }
+ llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
+ switch (Type) {
+ case NoDep:
+ case Forward:
+ return false;
+
+ case BackwardVectorizable:
+ case Unknown:
+ case ForwardButPreventsForwarding:
+ case Backward:
+ case BackwardVectorizableButPreventsForwarding:
+ return true;
+ }
+ llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
+ switch (Type) {
+ case NoDep:
+ case Forward:
+ case ForwardButPreventsForwarding:
+ return false;
+
+ case Unknown:
+ case BackwardVectorizable:
+ case Backward:
+ case BackwardVectorizableButPreventsForwarding:
+ return true;
+ }
+ llvm_unreachable("unexpected DepType!");
+}
+
bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
unsigned TypeByteSize) {
// If loads occur at a distance that is not a multiple of a feasible vector
@@ -704,9 +656,10 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
return false;
}
-bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
- const MemAccessInfo &B, unsigned BIdx,
- const ValueToValueMap &Strides) {
+MemoryDepChecker::Dependence::DepType
+MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx,
+ const ValueToValueMap &Strides) {
assert (AIdx < BIdx && "Must pass arguments in program order");
Value *APtr = A.getPointer();
@@ -716,18 +669,18 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
- return false;
+ return Dependence::NoDep;
// We cannot check pointers in different address spaces.
if (APtr->getType()->getPointerAddressSpace() !=
BPtr->getType()->getPointerAddressSpace())
- return true;
+ return Dependence::Unknown;
const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
- int StrideAPtr = isStridedPtr(SE, DL, APtr, InnermostLoop, Strides);
- int StrideBPtr = isStridedPtr(SE, DL, BPtr, InnermostLoop, Strides);
+ int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
+ int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
const SCEV *Src = AScev;
const SCEV *Sink = BScev;
@@ -756,19 +709,20 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// the address space.
if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
DEBUG(dbgs() << "Non-consecutive pointer access\n");
- return true;
+ return Dependence::Unknown;
}
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
ShouldRetryWithRuntimeCheck = true;
- return true;
+ return Dependence::Unknown;
}
Type *ATy = APtr->getType()->getPointerElementType();
Type *BTy = BPtr->getType()->getPointerElementType();
- unsigned TypeByteSize = DL->getTypeAllocSize(ATy);
+ auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+ unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
// Negative distances are not plausible dependencies.
const APInt &Val = C->getValue()->getValue();
@@ -777,19 +731,19 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (IsTrueDataDependence &&
(couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
ATy != BTy))
- return true;
+ return Dependence::ForwardButPreventsForwarding;
DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
- return false;
+ return Dependence::Forward;
}
// Write to the same location with the same size.
// Could be improved to assert type sizes are the same (i32 == float, etc).
if (Val == 0) {
if (ATy == BTy)
- return false;
+ return Dependence::NoDep;
DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
- return true;
+ return Dependence::Unknown;
}
assert(Val.isStrictlyPositive() && "Expect a positive value");
@@ -797,7 +751,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (ATy != BTy) {
DEBUG(dbgs() <<
"LAA: ReadWrite-Write positive dependency with different types\n");
- return true;
+ return Dependence::Unknown;
}
unsigned Distance = (unsigned) Val.getZExtValue();
@@ -816,7 +770,7 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
DEBUG(dbgs() << "LAA: Failure because of Positive distance "
<< Val.getSExtValue() << '\n');
- return true;
+ return Dependence::Backward;
}
// Positive distance bigger than max vectorization factor.
@@ -826,15 +780,15 @@ bool MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
if (IsTrueDataDependence &&
couldPreventStoreLoadForward(Distance, TypeByteSize))
- return true;
+ return Dependence::BackwardVectorizableButPreventsForwarding;
DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() <<
" with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
- return false;
+ return Dependence::BackwardVectorizable;
}
-bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
+bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
MemAccessInfoSet &CheckDeps,
const ValueToValueMap &Strides) {
@@ -860,9 +814,33 @@ bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
- if (*I1 < *I2 && isDependent(*AI, *I1, *OI, *I2, Strides))
- return false;
- if (*I2 < *I1 && isDependent(*OI, *I2, *AI, *I1, Strides))
+ auto A = std::make_pair(&*AI, *I1);
+ auto B = std::make_pair(&*OI, *I2);
+
+ assert(*I1 != *I2);
+ if (*I1 > *I2)
+ std::swap(A, B);
+
+ Dependence::DepType Type =
+ isDependent(*A.first, A.second, *B.first, B.second, Strides);
+ SafeForVectorization &= Dependence::isSafeForVectorization(Type);
+
+ // Gather dependences unless we accumulated MaxInterestingDependence
+ // dependences. In that case return as soon as we find the first
+ // unsafe dependence. This puts a limit on this quadratic
+ // algorithm.
+ if (RecordInterestingDependences) {
+ if (Dependence::isInterestingDependence(Type))
+ InterestingDependences.push_back(
+ Dependence(A.second, B.second, Type));
+
+ if (InterestingDependences.size() >= MaxInterestingDependence) {
+ RecordInterestingDependences = false;
+ InterestingDependences.clear();
+ DEBUG(dbgs() << "Too many dependences, stopped recording\n");
+ }
+ }
+ if (!RecordInterestingDependences && !SafeForVectorization)
return false;
}
++OI;
@@ -870,7 +848,34 @@ bool MemoryDepChecker::areDepsSafe(AccessAnalysis::DepCandidates &AccessSets,
AI++;
}
}
- return true;
+
+ DEBUG(dbgs() << "Total Interesting Dependences: "
+ << InterestingDependences.size() << "\n");
+ return SafeForVectorization;
+}
+
+SmallVector<Instruction *, 4>
+MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
+ MemAccessInfo Access(Ptr, isWrite);
+ auto &IndexVector = Accesses.find(Access)->second;
+
+ SmallVector<Instruction *, 4> Insts;
+ std::transform(IndexVector.begin(), IndexVector.end(),
+ std::back_inserter(Insts),
+ [&](unsigned Idx) { return this->InstMap[Idx]; });
+ return Insts;
+}
+
+const char *MemoryDepChecker::Dependence::DepName[] = {
+ "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
+ "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
+
+void MemoryDepChecker::Dependence::print(
+ raw_ostream &OS, unsigned Depth,
+ const SmallVectorImpl<Instruction *> &Instrs) const {
+ OS.indent(Depth) << DepName[Type] << ":\n";
+ OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
+ OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
}
bool LoopAccessInfo::canAnalyzeLoop() {
@@ -939,7 +944,6 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
PtrRtCheck.Need = false;
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
- MemoryDepChecker DepChecker(SE, DL, TheLoop);
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -960,6 +964,12 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (Call && getIntrinsicIDForCall(Call, TLI))
continue;
+ // If the function has an explicit vectorized counterpart, we can safely
+ // assume that it can be vectorized.
+ if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
+ TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
+ continue;
+
LoadInst *Ld = dyn_cast<LoadInst>(it);
if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
emitAnalysis(LoopAccessReport(Ld)
@@ -1008,8 +1018,9 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
return;
}
- AccessAnalysis::DepCandidates DependentAccesses;
- AccessAnalysis Accesses(DL, AA, DependentAccesses);
+ MemoryDepChecker::DepCandidates DependentAccesses;
+ AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
+ AA, DependentAccesses);
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
@@ -1068,8 +1079,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// read a few words, modify, and write a few words, and some of the
// words may be written to the same address.
bool IsReadOnlyPtr = false;
- if (Seen.insert(Ptr).second ||
- !isStridedPtr(SE, DL, Ptr, TheLoop, Strides)) {
+ if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
++NumReads;
IsReadOnlyPtr = true;
}
@@ -1099,7 +1109,6 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
- unsigned NumComparisons = 0;
bool CanDoRT = false;
if (NeedRTCheck)
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
@@ -1113,18 +1122,10 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
if (NumComparisons == 0 && NeedRTCheck)
NeedRTCheck = false;
- // Check that we did not collect too many pointers or found an unsizeable
- // pointer.
- if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
- PtrRtCheck.reset();
- CanDoRT = false;
- }
-
- if (CanDoRT) {
+ // Check that we found the bounds for the pointer.
+ if (CanDoRT)
DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
- }
-
- if (NeedRTCheck && !CanDoRT) {
+ else if (NeedRTCheck) {
emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
"the array bounds.\n");
@@ -1154,17 +1155,10 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
TheLoop, Strides, true);
- // Check that we did not collect too many pointers or found an unsizeable
- // pointer.
- if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
- if (!CanDoRT && NumComparisons > 0)
- emitAnalysis(LoopAccessReport()
- << "cannot check memory dependencies at runtime");
- else
- emitAnalysis(LoopAccessReport()
- << NumComparisons << " exceeds limit of "
- << RuntimeMemoryCheckThreshold
- << " dependent memory operations checked at runtime");
+ // Check that we found the bounds for the pointer.
+ if (!CanDoRT && NumComparisons > 0) {
+ emitAnalysis(LoopAccessReport()
+ << "cannot check memory dependencies at runtime");
DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
PtrRtCheck.reset();
CanVecMem = false;
@@ -1175,12 +1169,15 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
}
}
- if (!CanVecMem)
+ if (CanVecMem)
+ DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
+ << (NeedRTCheck ? "" : " don't")
+ << " need a runtime memory check.\n");
+ else {
emitAnalysis(LoopAccessReport() <<
"unsafe dependent memory operations in loop");
-
- DEBUG(dbgs() << "LAA: We" << (NeedRTCheck ? "" : " don't") <<
- " need a runtime memory check.\n");
+ DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+ }
}
bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
@@ -1212,8 +1209,8 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
return nullptr;
}
-std::pair<Instruction *, Instruction *>
-LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const {
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
+ Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
Instruction *tnullptr = nullptr;
if (!PtrRtCheck.Need)
return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
@@ -1223,7 +1220,7 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const {
SmallVector<TrackingVH<Value> , 2> Ends;
LLVMContext &Ctx = Loc->getContext();
- SCEVExpander Exp(*SE, "induction");
+ SCEVExpander Exp(*SE, DL, "induction");
Instruction *FirstInst = nullptr;
for (unsigned i = 0; i < NumPointers; ++i) {
@@ -1254,7 +1251,7 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const {
Value *MemoryRuntimeCheck = nullptr;
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
- if (!PtrRtCheck.needsChecking(i, j))
+ if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
continue;
unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
@@ -1298,12 +1295,13 @@ LoopAccessInfo::addRuntimeCheck(Instruction *Loc) const {
}
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT,
const ValueToValueMap &Strides)
- : TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), NumLoads(0),
- NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false) {
+ : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
+ TLI(TLI), AA(AA), DT(DT), NumLoads(0), NumStores(0),
+ MaxSafeDepDistBytes(-1U), CanVecMem(false) {
if (canAnalyzeLoop())
analyzeLoop(Strides);
}
@@ -1319,7 +1317,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
if (Report)
OS.indent(Depth) << "Report: " << Report->str() << "\n";
- // FIXME: Print unsafe dependences
+ if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
+ OS.indent(Depth) << "Interesting Dependences:\n";
+ for (auto &Dep : *InterestingDependences) {
+ Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
+ OS << "\n";
+ }
+ } else
+ OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
// List the pair of accesses need run-time checks to prove independence.
PtrRtCheck.print(OS, Depth);
@@ -1336,6 +1341,7 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
#endif
if (!LAI) {
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, Strides);
#ifndef NDEBUG
LAI->NumSymbolicStrides = Strides.size();
@@ -1360,7 +1366,6 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
bool LoopAccessAnalysis::runOnFunction(Function &F) {
SE = &getAnalysis<ScalarEvolution>();
- DL = F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
AA = &getAnalysis<AliasAnalysis>();
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 95f6eb0..6462b06 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index a99c949..e9fcf02 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "loop-pass-manager"
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index 531d75e..6119a3d 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -14,6 +14,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -27,7 +28,6 @@ namespace {
initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
AU.setPreservesAll();
}
bool runOnFunction(Function &F) override;
@@ -41,7 +41,6 @@ namespace {
char MemDerefPrinter::ID = 0;
INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs",
"Memory Dereferenciblity of pointers in function", false, true)
-INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs",
"Memory Dereferenciblity of pointers in function", false, true)
@@ -50,7 +49,7 @@ FunctionPass *llvm::createMemDerefPrinter() {
}
bool MemDerefPrinter::runOnFunction(Function &F) {
- const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ const DataLayout &DL = F.getParent()->getDataLayout();
for (auto &I: inst_range(F)) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Value *PO = LI->getPointerOperand();
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 6108af3..8ddac8f 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -206,7 +206,7 @@ const CallInst *llvm::extractMallocCall(const Value *I,
return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
}
-static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
+static Value *computeArraySize(const CallInst *CI, const DataLayout &DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt = false) {
if (!CI)
@@ -214,12 +214,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
// The size of the malloc's result type must be known to determine array size.
Type *T = getMallocAllocatedType(CI, TLI);
- if (!T || !T->isSized() || !DL)
+ if (!T || !T->isSized())
return nullptr;
- unsigned ElementSize = DL->getTypeAllocSize(T);
+ unsigned ElementSize = DL.getTypeAllocSize(T);
if (StructType *ST = dyn_cast<StructType>(T))
- ElementSize = DL->getStructLayout(ST)->getSizeInBytes();
+ ElementSize = DL.getStructLayout(ST)->getSizeInBytes();
// If malloc call's arg can be determined to be a multiple of ElementSize,
// return the multiple. Otherwise, return NULL.
@@ -232,23 +232,6 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
return nullptr;
}
-/// isArrayMalloc - Returns the corresponding CallInst if the instruction
-/// is a call to malloc whose array size can be determined and the array size
-/// is not constant 1. Otherwise, return NULL.
-const CallInst *llvm::isArrayMalloc(const Value *I,
- const DataLayout *DL,
- const TargetLibraryInfo *TLI) {
- const CallInst *CI = extractMallocCall(I, TLI);
- Value *ArraySize = computeArraySize(CI, DL, TLI);
-
- if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
- if (ConstSize->isOne())
- return CI;
-
- // CI is a non-array malloc or we can't figure out that it is an array malloc.
- return nullptr;
-}
-
/// getMallocType - Returns the PointerType resulting from the malloc call.
/// The PointerType depends on the number of bitcast uses of the malloc call:
/// 0: PointerType is the calls' return type.
@@ -297,7 +280,7 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI,
/// then return that multiple. For non-array mallocs, the multiple is
/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
/// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL,
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt) {
assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
@@ -367,11 +350,8 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
/// object size in Size if successful, and false otherwise.
/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
/// byval arguments, and global variables.
-bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL,
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
const TargetLibraryInfo *TLI, bool RoundToAlign) {
- if (!DL)
- return false;
-
ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign);
SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
if (!Visitor.bothKnown(Data))
@@ -399,17 +379,17 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
return Size;
}
-ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
const TargetLibraryInfo *TLI,
LLVMContext &Context,
bool RoundToAlign)
-: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
+ : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
// Pointer size must be rechecked for each object visited since it could have
// a different address space.
}
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
- IntTyBits = DL->getPointerTypeSizeInBits(V->getType());
+ IntTyBits = DL.getPointerTypeSizeInBits(V->getType());
Zero = APInt::getNullValue(IntTyBits);
V = V->stripPointerCasts();
@@ -449,7 +429,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
- APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType()));
+ APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType()));
if (!I.isArrayAllocation())
return std::make_pair(align(Size, I.getAlignment()), Zero);
@@ -468,7 +448,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
return unknown();
}
PointerType *PT = cast<PointerType>(A.getType());
- APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType()));
+ APInt Size(IntTyBits, DL.getTypeAllocSize(PT->getElementType()));
return std::make_pair(align(Size, A.getParamAlignment()), Zero);
}
@@ -541,7 +521,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
SizeOffsetType PtrData = compute(GEP.getPointerOperand());
APInt Offset(IntTyBits, 0);
- if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset))
+ if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset))
return unknown();
return std::make_pair(PtrData.first, PtrData.second + Offset);
@@ -557,7 +537,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
if (!GV.hasDefinitiveInitializer())
return unknown();
- APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType()));
+ APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType()));
return std::make_pair(align(Size, GV.getAlignment()), Zero);
}
@@ -593,19 +573,18 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
return unknown();
}
-ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- LLVMContext &Context,
- bool RoundToAlign)
-: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
- RoundToAlign(RoundToAlign) {
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
+ const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context,
+ bool RoundToAlign)
+ : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
+ RoundToAlign(RoundToAlign) {
// IntTy and Zero must be set for each compute() since the address space may
// be different for later objects.
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
// XXX - Are vectors of pointers possible here?
- IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
+ IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
Zero = ConstantInt::get(IntTy, 0);
SizeOffsetEvalType Result = compute_(V);
@@ -687,7 +666,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
assert(I.isArrayAllocation());
Value *ArraySize = I.getArraySize();
Value *Size = ConstantInt::get(ArraySize->getType(),
- DL->getTypeAllocSize(I.getAllocatedType()));
+ DL.getTypeAllocSize(I.getAllocatedType()));
Size = Builder.CreateMul(Size, ArraySize);
return std::make_pair(Size, Zero);
}
@@ -739,7 +718,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
if (!bothKnown(PtrData))
return unknown();
- Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true);
+ Value *Offset = EmitGEPOffset(&Builder, DL, &GEP, /*NoAssumptions=*/true);
Offset = Builder.CreateAdd(PtrData.second, Offset);
return std::make_pair(PtrData.first, Offset);
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 6d38863..716e3e6 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -93,8 +93,6 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
@@ -263,22 +261,17 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
///
/// MemLocBase, MemLocOffset are lazily computed here the first time the
/// base/offs of memloc is needed.
-static bool
-isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
- const Value *&MemLocBase,
- int64_t &MemLocOffs,
- const LoadInst *LI,
- const DataLayout *DL) {
- // If we have no target data, we can't do this.
- if (!DL) return false;
+static bool isLoadLoadClobberIfExtendedToFullWidth(
+ const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase,
+ int64_t &MemLocOffs, const LoadInst *LI) {
+ const DataLayout &DL = LI->getModule()->getDataLayout();
// If we haven't already computed the base/offset of MemLoc, do so now.
if (!MemLocBase)
MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
- unsigned Size = MemoryDependenceAnalysis::
- getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
- LI, *DL);
+ unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+ MemLocBase, MemLocOffs, MemLoc.Size, LI);
return Size != 0;
}
@@ -289,10 +282,9 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
/// 2) safe for the target, and 3) would provide the specified memory
/// location value, then this function returns the size in bytes of the
/// load width to use. If not, this returns zero.
-unsigned MemoryDependenceAnalysis::
-getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
- unsigned MemLocSize, const LoadInst *LI,
- const DataLayout &DL) {
+unsigned MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+ const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
+ const LoadInst *LI) {
// We can only extend simple integer loads.
if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
@@ -301,10 +293,12 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return 0;
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+
// Get the base of this load.
int64_t LIOffs = 0;
const Value *LIBase =
- GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL);
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
// If the two pointers are not based on the same pointer, we can't tell that
// they are related.
@@ -413,14 +407,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// by every program that can detect any optimisation of that kind: either
// it is racy (undefined) or there is a release followed by an acquire
// between the pair of accesses under consideration.
- bool HasSeenAcquire = false;
+ // If the load is invariant, we "know" that it doesn't alias *any* write. We
+ // do want to respect mustalias results since defs are useful for value
+ // forwarding, but any mayalias write can be assumed to be noalias.
+ // Arguably, this logic should be pushed inside AliasAnalysis itself.
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
isInvariantLoad = true;
}
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
Instruction *Inst = --ScanIt;
@@ -472,12 +471,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// Atomic loads have complications involved.
// A Monotonic (or higher) load is OK if the query inst is itself not atomic.
- // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
- // release store will know to return getClobber.
// FIXME: This is overly conservative.
if (LI->isAtomic() && LI->getOrdering() > Unordered) {
if (!QueryInst)
return MemDepResult::getClobber(LI);
+ if (LI->getOrdering() != Monotonic)
+ return MemDepResult::getClobber(LI);
if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
if (!QueryLI->isSimple())
return MemDepResult::getClobber(LI);
@@ -487,9 +486,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
} else if (QueryInst->mayReadOrWriteMemory()) {
return MemDepResult::getClobber(LI);
}
-
- if (isAtLeastAcquire(LI->getOrdering()))
- HasSeenAcquire = true;
}
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
@@ -505,12 +501,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// location is 1 byte at P+1). If so, return it as a load/load
// clobber result, allowing the client to decide to widen the load if
// it wants to.
- if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
- if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+ if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
- MemLocOffset, LI, DL))
+ MemLocOffset, LI))
return MemDepResult::getClobber(Inst);
-
+ }
continue;
}
@@ -549,12 +545,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Atomic stores have complications involved.
// A Monotonic store is OK if the query inst is itself not atomic.
- // A Release (or higher) store further requires that no acquire load
- // has been seen.
// FIXME: This is overly conservative.
if (!SI->isUnordered()) {
if (!QueryInst)
return MemDepResult::getClobber(SI);
+ if (SI->getOrdering() != Monotonic)
+ return MemDepResult::getClobber(SI);
if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
if (!QueryLI->isSimple())
return MemDepResult::getClobber(SI);
@@ -564,9 +560,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
} else if (QueryInst->mayReadOrWriteMemory()) {
return MemDepResult::getClobber(SI);
}
-
- if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
- return MemDepResult::getClobber(SI);
}
// FIXME: this is overly conservative.
@@ -612,6 +605,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
return MemDepResult::getDef(Inst);
+ if (isInvariantLoad)
+ continue;
// Be conservative if the accessed pointer may alias the allocation.
if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias)
return MemDepResult::getClobber(Inst);
@@ -622,6 +617,9 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
continue;
}
+ if (isInvariantLoad)
+ continue;
+
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
@@ -923,8 +921,7 @@ getNonLocalPointerDependency(Instruction *QueryInst,
const_cast<Value *>(Loc.Ptr)));
return;
}
-
-
+ const DataLayout &DL = FromBB->getModule()->getDataLayout();
PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC);
// This is the set of blocks we've inspected, and the pointer we consider in
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index f645558..cbc4700 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -55,28 +55,74 @@ bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
return false;
}
+static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory,
+ unsigned Line = 0) {
+ if (Filename.empty())
+ return;
+
+ O << " from ";
+ if (!Directory.empty())
+ O << Directory << "/";
+ O << Filename;
+ if (Line)
+ O << ":" << Line;
+}
+
void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+ // Printing the nodes directly isn't particularly helpful (since they
+ // reference other nodes that won't be printed, particularly for the
+ // filenames), so just print a few useful things.
for (DICompileUnit CU : Finder.compile_units()) {
- O << "Compile Unit: ";
- CU.print(O);
+ O << "Compile unit: ";
+ if (const char *Lang = LanguageString(CU.getLanguage()))
+ O << Lang;
+ else
+ O << "unknown-language(" << CU.getLanguage() << ")";
+ printFile(O, CU.getFilename(), CU.getDirectory());
O << '\n';
}
for (DISubprogram S : Finder.subprograms()) {
- O << "Subprogram: ";
- S.print(O);
+ O << "Subprogram: " << S.getName();
+ printFile(O, S.getFilename(), S.getDirectory(), S.getLineNumber());
+ if (!S.getLinkageName().empty())
+ O << " ('" << S.getLinkageName() << "')";
O << '\n';
}
for (DIGlobalVariable GV : Finder.global_variables()) {
- O << "GlobalVariable: ";
- GV.print(O);
+ O << "Global variable: " << GV.getName();
+ printFile(O, GV.getFilename(), GV.getDirectory(), GV.getLineNumber());
+ if (!GV.getLinkageName().empty())
+ O << " ('" << GV.getLinkageName() << "')";
O << '\n';
}
for (DIType T : Finder.types()) {
- O << "Type: ";
- T.print(O);
+ O << "Type:";
+ if (!T.getName().empty())
+ O << ' ' << T.getName();
+ printFile(O, T.getFilename(), T.getDirectory(), T.getLineNumber());
+ if (T.isBasicType()) {
+ DIBasicType BT(T.get());
+ O << " ";
+ if (const char *Encoding =
+ dwarf::AttributeEncodingString(BT.getEncoding()))
+ O << Encoding;
+ else
+ O << "unknown-encoding(" << BT.getEncoding() << ')';
+ } else {
+ O << ' ';
+ if (const char *Tag = dwarf::TagString(T.getTag()))
+ O << Tag;
+ else
+ O << "unknown-tag(" << T.getTag() << ")";
+ }
+ if (T.isCompositeType()) {
+ DICompositeType CT(T.get());
+ if (auto *S = CT.getIdentifier())
+ O << " (identifier: '" << S->getString() << "')";
+ }
O << '\n';
}
}
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index c214d3c..203e1da 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -33,11 +34,11 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {}
- void initializePass() override {
+ bool doInitialization(Module &M) override {
// Note: NoAA does not call InitializeAliasAnalysis because it's
// special and does not support chaining.
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ DL = &M.getDataLayout();
+ return true;
}
AliasResult alias(const Location &LocA, const Location &LocB) override {
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index a534418..177684f 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -404,10 +404,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
GEPOps.push_back(OpVal);
}
- GetElementPtrInst *Result =
- GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1),
- InVal->getName()+".phi.trans.insert",
- PredBB->getTerminator());
+ GetElementPtrInst *Result = GetElementPtrInst::Create(
+ GEP->getSourceElementType(), GEPOps[0], makeArrayRef(GEPOps).slice(1),
+ InVal->getName() + ".phi.trans.insert", PredBB->getTerminator());
Result->setIsInBounds(GEP->isInBounds());
NewInsts.push_back(Result);
return Result;
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 6fa7b2e..cd1e944 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "regionpassmgr"
@@ -83,9 +84,11 @@ bool RGPassManager::runOnFunction(Function &F) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
RegionPass *P = (RegionPass*)getContainedPass(Index);
- dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
- CurrentRegion->getNameStr());
- dumpRequiredSet(P);
+ if (isPassDebuggingExecutionsOrMore()) {
+ dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+ CurrentRegion->getNameStr());
+ dumpRequiredSet(P);
+ }
initializeAnalysisImpl(P);
@@ -96,11 +99,13 @@ bool RGPassManager::runOnFunction(Function &F) {
Changed |= P->runOnRegion(CurrentRegion, *this);
}
- if (Changed)
- dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
- skipThisRegion ? "<deleted>" :
- CurrentRegion->getNameStr());
- dumpPreservedSet(P);
+ if (isPassDebuggingExecutionsOrMore()) {
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr());
+ dumpPreservedSet(P);
+ }
if (!skipThisRegion) {
// Manually check that this region is still healthy. This is done
@@ -120,8 +125,8 @@ bool RGPassManager::runOnFunction(Function &F) {
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
removeDeadPasses(P,
- skipThisRegion ? "<deleted>" :
- CurrentRegion->getNameStr(),
+ (!isPassDebuggingExecutionsOrMore() || skipThisRegion) ?
+ "<deleted>" : CurrentRegion->getNameStr(),
ON_REGION_MSG);
if (skipThisRegion)
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 9e4eb11..4e713fb 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1102,13 +1102,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
// trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
- // eliminate all the truncates.
+ // eliminate all the truncates, or we replace other casts with truncates.
if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
bool hasTrunc = false;
for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
- hasTrunc = isa<SCEVTruncateExpr>(S);
+ if (!isa<SCEVCastExpr>(SA->getOperand(i)))
+ hasTrunc = isa<SCEVTruncateExpr>(S);
Operands.push_back(S);
}
if (!hasTrunc)
@@ -1117,13 +1118,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
}
// trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
- // eliminate all the truncates.
+ // eliminate all the truncates, or we replace other casts with truncates.
if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
bool hasTrunc = false;
for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
- hasTrunc = isa<SCEVTruncateExpr>(S);
+ if (!isa<SCEVCastExpr>(SM->getOperand(i)))
+ hasTrunc = isa<SCEVTruncateExpr>(S);
Operands.push_back(S);
}
if (!hasTrunc)
@@ -1325,6 +1327,85 @@ static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
(SE->*GetExtendExpr)(PreStart, Ty));
}
+// Try to prove away overflow by looking at "nearby" add recurrences. A
+// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
+// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
+//
+// Formally:
+//
+// {S,+,X} == {S-T,+,X} + T
+// => Ext({S,+,X}) == Ext({S-T,+,X} + T)
+//
+// If ({S-T,+,X} + T) does not overflow ... (1)
+//
+// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
+//
+// If {S-T,+,X} does not overflow ... (2)
+//
+// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
+// == {Ext(S-T)+Ext(T),+,Ext(X)}
+//
+// If (S-T)+T does not overflow ... (3)
+//
+// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
+// == {Ext(S),+,Ext(X)} == LHS
+//
+// Thus, if (1), (2) and (3) are true for some T, then
+// Ext({S,+,X}) == {Ext(S),+,Ext(X)}
+//
+// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
+// does not overflow" restricted to the 0th iteration. Therefore we only need
+// to check for (1) and (2).
+//
+// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
+// is `Delta` (defined below).
+//
+template <typename ExtendOpTy>
+bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
+ const SCEV *Step,
+ const Loop *L) {
+ auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
+
+ // We restrict `Start` to a constant to prevent SCEV from spending too much
+ // time here. It is correct (but more expensive) to continue with a
+ // non-constant `Start` and do a general SCEV subtraction to compute
+ // `PreStart` below.
+ //
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
+ if (!StartC)
+ return false;
+
+ APInt StartAI = StartC->getValue()->getValue();
+
+ for (unsigned Delta : {-2, -1, 1, 2}) {
+ const SCEV *PreStart = getConstant(StartAI - Delta);
+
+ // Give up if we don't already have the add recurrence we need because
+ // actually constructing an add recurrence is relatively expensive.
+ const SCEVAddRecExpr *PreAR = [&]() {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ ID.AddPointer(PreStart);
+ ID.AddPointer(Step);
+ ID.AddPointer(L);
+ void *IP = nullptr;
+ return static_cast<SCEVAddRecExpr *>(
+ this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ }();
+
+ if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
+ const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
+ ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+ const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
+ DeltaS, &Pred, this);
+ if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1)
+ return true;
+ }
+ }
+
+ return false;
+}
+
const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1473,6 +1554,13 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
}
}
}
+
+ if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ }
}
// The cast wasn't folded; create an explicit cast node.
@@ -1664,6 +1752,13 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
}
}
+
+ if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ }
}
// The cast wasn't folded; create an explicit cast node.
@@ -3037,39 +3132,23 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
}
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
- // If we have DataLayout, we can bypass creating a target-independent
+ // We can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (DL)
- return getConstant(IntTy, DL->getTypeAllocSize(AllocTy));
-
- Constant *C = ConstantExpr::getSizeOf(AllocTy);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
- C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
- assert(Ty == IntTy && "Effective SCEV type doesn't match");
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
+ return getConstant(IntTy,
+ F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
}
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
StructType *STy,
unsigned FieldNo) {
- // If we have DataLayout, we can bypass creating a target-independent
+ // We can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (DL) {
- return getConstant(IntTy,
- DL->getStructLayout(STy)->getElementOffset(FieldNo));
- }
-
- Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
- C = Folded;
-
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
+ return getConstant(
+ IntTy,
+ F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
+ FieldNo));
}
const SCEV *ScalarEvolution::getUnknown(Value *V) {
@@ -3111,19 +3190,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {
/// for which isSCEVable must return true.
uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
-
- // If we have a DataLayout, use it!
- if (DL)
- return DL->getTypeSizeInBits(Ty);
-
- // Integer types have fixed sizes.
- if (Ty->isIntegerTy())
- return Ty->getPrimitiveSizeInBits();
-
- // The only other support type is pointer. Without DataLayout, conservatively
- // assume pointers are 64-bit.
- assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
- return 64;
+ return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
}
/// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -3139,12 +3206,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-
- if (DL)
- return DL->getIntPtrType(Ty);
-
- // Without DataLayout, conservatively assume pointers are 64-bit.
- return Type::getInt64Ty(getContext());
+ return F->getParent()->getDataLayout().getIntPtrType(Ty);
}
const SCEV *ScalarEvolution::getCouldNotCompute() {
@@ -3531,10 +3593,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// If the increment doesn't overflow, then neither the addrec nor
// the post-increment will overflow.
if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
- if (OBO->hasNoUnsignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNUW);
- if (OBO->hasNoSignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNSW);
+ if (OBO->getOperand(0) == PN) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
+ }
} else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
// If the increment is an inbounds GEP, then we know the address
// space cannot be wrapped around. We cannot make any guarantee
@@ -3542,7 +3606,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// unsigned but we may have a negative index from the base
// pointer. We can guarantee that no unsigned wrap occurs if the
// indices form a positive value.
- if (GEP->isInBounds()) {
+ if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
Flags = setFlags(Flags, SCEV::FlagNW);
const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
@@ -3608,7 +3672,8 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AC))
+ if (Value *V =
+ SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
@@ -3740,7 +3805,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+ computeKnownBits(U->getValue(), Zeros, Ones,
+ F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
return Zeros.countTrailingOnes();
}
@@ -3775,79 +3841,93 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
return None;
}
-/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+/// getRange - Determine the range for a particular SCEV. If SignHint is
+/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
+/// with a "cleaner" unsigned (resp. signed) representation.
///
ConstantRange
-ScalarEvolution::getUnsignedRange(const SCEV *S) {
+ScalarEvolution::getRange(const SCEV *S,
+ ScalarEvolution::RangeSignHint SignHint) {
+ DenseMap<const SCEV *, ConstantRange> &Cache =
+ SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
+ : SignedRanges;
+
// See if we've computed this range already.
- DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
- if (I != UnsignedRanges.end())
+ DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
+ if (I != Cache.end())
return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
+ return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
- // If the value has known zeros, the maximum unsigned value will have those
- // known zeros as well.
+ // If the value has known zeros, the maximum value will have those known zeros
+ // as well.
uint32_t TZ = GetMinTrailingZeros(S);
- if (TZ != 0)
- ConservativeResult =
- ConstantRange(APInt::getMinValue(BitWidth),
- APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+ if (TZ != 0) {
+ if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
+ ConservativeResult =
+ ConstantRange(APInt::getMinValue(BitWidth),
+ APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+ else
+ ConservativeResult = ConstantRange(
+ APInt::getSignedMinValue(BitWidth),
+ APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+ }
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- ConstantRange X = getUnsignedRange(Add->getOperand(0));
+ ConstantRange X = getRange(Add->getOperand(0), SignHint);
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
- X = X.add(getUnsignedRange(Add->getOperand(i)));
- return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
+ X = X.add(getRange(Add->getOperand(i), SignHint));
+ return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
- ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+ ConstantRange X = getRange(Mul->getOperand(0), SignHint);
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
- X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
- return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
+ X = X.multiply(getRange(Mul->getOperand(i), SignHint));
+ return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
- ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+ ConstantRange X = getRange(SMax->getOperand(0), SignHint);
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
- X = X.smax(getUnsignedRange(SMax->getOperand(i)));
- return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
+ X = X.smax(getRange(SMax->getOperand(i), SignHint));
+ return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
- ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+ ConstantRange X = getRange(UMax->getOperand(0), SignHint);
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
- X = X.umax(getUnsignedRange(UMax->getOperand(i)));
- return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
+ X = X.umax(getRange(UMax->getOperand(i), SignHint));
+ return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
- ConstantRange X = getUnsignedRange(UDiv->getLHS());
- ConstantRange Y = getUnsignedRange(UDiv->getRHS());
- return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+ ConstantRange X = getRange(UDiv->getLHS(), SignHint);
+ ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
+ return setRange(UDiv, SignHint,
+ ConservativeResult.intersectWith(X.udiv(Y)));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
- ConstantRange X = getUnsignedRange(ZExt->getOperand());
- return setUnsignedRange(ZExt,
- ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ ConstantRange X = getRange(ZExt->getOperand(), SignHint);
+ return setRange(ZExt, SignHint,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
- ConstantRange X = getUnsignedRange(SExt->getOperand());
- return setUnsignedRange(SExt,
- ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ ConstantRange X = getRange(SExt->getOperand(), SignHint);
+ return setRange(SExt, SignHint,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
- ConstantRange X = getUnsignedRange(Trunc->getOperand());
- return setUnsignedRange(Trunc,
- ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ ConstantRange X = getRange(Trunc->getOperand(), SignHint);
+ return setRange(Trunc, SignHint,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -3860,143 +3940,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
ConservativeResult.intersectWith(
ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
- // TODO: non-affine addrec
- if (AddRec->isAffine()) {
- Type *Ty = AddRec->getType();
- const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
- if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
- getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
- MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
-
- const SCEV *Start = AddRec->getStart();
- const SCEV *Step = AddRec->getStepRecurrence(*this);
-
- ConstantRange StartRange = getUnsignedRange(Start);
- ConstantRange StepRange = getSignedRange(Step);
- ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
- ConstantRange EndRange =
- StartRange.add(MaxBECountRange.multiply(StepRange));
-
- // Check for overflow. This must be done with ConstantRange arithmetic
- // because we could be called from within the ScalarEvolution overflow
- // checking code.
- ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
- ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
- ConstantRange ExtMaxBECountRange =
- MaxBECountRange.zextOrTrunc(BitWidth*2+1);
- ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
- if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
- ExtEndRange)
- return setUnsignedRange(AddRec, ConservativeResult);
-
- APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
- EndRange.getUnsignedMin());
- APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
- EndRange.getUnsignedMax());
- if (Min.isMinValue() && Max.isMaxValue())
- return setUnsignedRange(AddRec, ConservativeResult);
- return setUnsignedRange(AddRec,
- ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
- }
- }
-
- return setUnsignedRange(AddRec, ConservativeResult);
- }
-
- if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
- // Check if the IR explicitly contains !range metadata.
- Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
- if (MDRange.hasValue())
- ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
-
- // For a SCEVUnknown, ask ValueTracking.
- APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
- if (Ones == ~Zeros + 1)
- return setUnsignedRange(U, ConservativeResult);
- return setUnsignedRange(U,
- ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
- }
-
- return setUnsignedRange(S, ConservativeResult);
-}
-
-/// getSignedRange - Determine the signed range for a particular SCEV.
-///
-ConstantRange
-ScalarEvolution::getSignedRange(const SCEV *S) {
- // See if we've computed this range already.
- DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
- if (I != SignedRanges.end())
- return I->second;
-
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
-
- unsigned BitWidth = getTypeSizeInBits(S->getType());
- ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
-
- // If the value has known zeros, the maximum signed value will have those
- // known zeros as well.
- uint32_t TZ = GetMinTrailingZeros(S);
- if (TZ != 0)
- ConservativeResult =
- ConstantRange(APInt::getSignedMinValue(BitWidth),
- APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
-
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- ConstantRange X = getSignedRange(Add->getOperand(0));
- for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
- X = X.add(getSignedRange(Add->getOperand(i)));
- return setSignedRange(Add, ConservativeResult.intersectWith(X));
- }
-
- if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
- ConstantRange X = getSignedRange(Mul->getOperand(0));
- for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
- X = X.multiply(getSignedRange(Mul->getOperand(i)));
- return setSignedRange(Mul, ConservativeResult.intersectWith(X));
- }
-
- if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
- ConstantRange X = getSignedRange(SMax->getOperand(0));
- for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
- X = X.smax(getSignedRange(SMax->getOperand(i)));
- return setSignedRange(SMax, ConservativeResult.intersectWith(X));
- }
-
- if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
- ConstantRange X = getSignedRange(UMax->getOperand(0));
- for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
- X = X.umax(getSignedRange(UMax->getOperand(i)));
- return setSignedRange(UMax, ConservativeResult.intersectWith(X));
- }
-
- if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
- ConstantRange X = getSignedRange(UDiv->getLHS());
- ConstantRange Y = getSignedRange(UDiv->getRHS());
- return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
- }
-
- if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
- ConstantRange X = getSignedRange(ZExt->getOperand());
- return setSignedRange(ZExt,
- ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
- }
-
- if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
- ConstantRange X = getSignedRange(SExt->getOperand());
- return setSignedRange(SExt,
- ConservativeResult.intersectWith(X.signExtend(BitWidth)));
- }
-
- if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
- ConstantRange X = getSignedRange(Trunc->getOperand());
- return setSignedRange(Trunc,
- ConservativeResult.intersectWith(X.truncate(BitWidth)));
- }
-
- if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
// If there's no signed wrap, and all the operands have the same sign or
// zero, the value won't ever change sign.
if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
@@ -4022,41 +3965,66 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+
+ // Check for overflow. This must be done with ConstantRange arithmetic
+ // because we could be called from within the ScalarEvolution overflow
+ // checking code.
+
MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+ ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+ ConstantRange ZExtMaxBECountRange =
+ MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*this);
+ ConstantRange StepSRange = getSignedRange(Step);
+ ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
+
+ ConstantRange StartURange = getUnsignedRange(Start);
+ ConstantRange EndURange =
+ StartURange.add(MaxBECountRange.multiply(StepSRange));
+
+ // Check for unsigned overflow.
+ ConstantRange ZExtStartURange =
+ StartURange.zextOrTrunc(BitWidth * 2 + 1);
+ ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
+ if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
+ ZExtEndURange) {
+ APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
+ EndURange.getUnsignedMin());
+ APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
+ EndURange.getUnsignedMax());
+ bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
+ if (!IsFullRange)
+ ConservativeResult =
+ ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
+ }
- ConstantRange StartRange = getSignedRange(Start);
- ConstantRange StepRange = getSignedRange(Step);
- ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
- ConstantRange EndRange =
- StartRange.add(MaxBECountRange.multiply(StepRange));
-
- // Check for overflow. This must be done with ConstantRange arithmetic
- // because we could be called from within the ScalarEvolution overflow
- // checking code.
- ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
- ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
- ConstantRange ExtMaxBECountRange =
- MaxBECountRange.zextOrTrunc(BitWidth*2+1);
- ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
- if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
- ExtEndRange)
- return setSignedRange(AddRec, ConservativeResult);
-
- APInt Min = APIntOps::smin(StartRange.getSignedMin(),
- EndRange.getSignedMin());
- APInt Max = APIntOps::smax(StartRange.getSignedMax(),
- EndRange.getSignedMax());
- if (Min.isMinSignedValue() && Max.isMaxSignedValue())
- return setSignedRange(AddRec, ConservativeResult);
- return setSignedRange(AddRec,
- ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+ ConstantRange StartSRange = getSignedRange(Start);
+ ConstantRange EndSRange =
+ StartSRange.add(MaxBECountRange.multiply(StepSRange));
+
+ // Check for signed overflow. This must be done with ConstantRange
+ // arithmetic because we could be called from within the ScalarEvolution
+ // overflow checking code.
+ ConstantRange SExtStartSRange =
+ StartSRange.sextOrTrunc(BitWidth * 2 + 1);
+ ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
+ if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
+ SExtEndSRange) {
+ APInt Min = APIntOps::smin(StartSRange.getSignedMin(),
+ EndSRange.getSignedMin());
+ APInt Max = APIntOps::smax(StartSRange.getSignedMax(),
+ EndSRange.getSignedMax());
+ bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
+ if (!IsFullRange)
+ ConservativeResult =
+ ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
+ }
}
}
- return setSignedRange(AddRec, ConservativeResult);
+ return setRange(AddRec, SignHint, ConservativeResult);
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -4065,18 +4033,31 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
if (MDRange.hasValue())
ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
- // For a SCEVUnknown, ask ValueTracking.
- if (!U->getValue()->getType()->isIntegerTy() && !DL)
- return setSignedRange(U, ConservativeResult);
- unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
- if (NS <= 1)
- return setSignedRange(U, ConservativeResult);
- return setSignedRange(U, ConservativeResult.intersectWith(
- ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
- APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
+ // Split here to avoid paying the compile-time cost of calling both
+ // computeKnownBits and ComputeNumSignBits. This restriction can be lifted
+ // if needed.
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
+ // For a SCEVUnknown, ask ValueTracking.
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+ if (Ones != ~Zeros + 1)
+ ConservativeResult =
+ ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
+ } else {
+ assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
+ "generalize as needed!");
+ unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
+ if (NS > 1)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
+ }
+
+ return setRange(U, SignHint, ConservativeResult);
}
- return setSignedRange(S, ConservativeResult);
+ return setRange(S, SignHint, ConservativeResult);
}
/// createSCEV - We know that there is no SCEV for the specified value.
@@ -4175,8 +4156,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, 0, AC,
- nullptr, DT);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
+ F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -5327,12 +5308,9 @@ static bool canConstantEvolve(Instruction *I, const Loop *L) {
if (!L->contains(I)) return false;
if (isa<PHINode>(I)) {
- if (L->getHeader() == I->getParent())
- return true;
- else
- // We don't currently keep track of the control flow needed to evaluate
- // PHIs, so we cannot handle PHIs inside of loops.
- return false;
+ // We don't currently keep track of the control flow needed to evaluate
+ // PHIs, so we cannot handle PHIs inside of loops.
+ return L->getHeader() == I->getParent();
}
// If we won't be able to constant fold this expression even if the operands
@@ -5403,7 +5381,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
/// reason, return null.
static Constant *EvaluateExpression(Value *V, const Loop *L,
DenseMap<Instruction *, Constant *> &Vals,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
@@ -5492,6 +5470,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
+ const DataLayout &DL = F->getParent()->getDataLayout();
for (; ; ++IterationNum) {
if (IterationNum == NumIterations)
return RetVal = CurrentIterVals[PN]; // Got exit value!
@@ -5499,8 +5478,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
// Compute the value of the PHIs for the next iteration.
// EvaluateExpression adds non-phi values to the CurrentIterVals map.
DenseMap<Instruction *, Constant *> NextIterVals;
- Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
- TLI);
+ Constant *NextPHI =
+ EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
if (!NextPHI)
return nullptr; // Couldn't evaluate!
NextIterVals[PN] = NextPHI;
@@ -5576,12 +5555,11 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
// Okay, we find a PHI node that defines the trip count of this loop. Execute
// the loop symbolically to determine when the condition gets a value of
// "ExitWhen".
-
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
+ const DataLayout &DL = F->getParent()->getDataLayout();
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
- ConstantInt *CondVal =
- dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
- DL, TLI));
+ ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
+ EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
@@ -5814,16 +5792,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if getSCEVAtScope actually made an improvement.
if (MadeImprovement) {
Constant *C = nullptr;
+ const DataLayout &DL = F->getParent()->getDataLayout();
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- C = ConstantFoldCompareInstOperands(CI->getPredicate(),
- Operands[0], Operands[1], DL,
- TLI);
+ C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+ Operands[1], DL, TLI);
else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
} else
- C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- Operands, DL, TLI);
+ C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
+ DL, TLI);
if (!C) return V;
return getSCEV(C);
}
@@ -6105,7 +6083,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
R1->getValue(),
R2->getValue()))) {
- if (CB->getZExtValue() == false)
+ if (!CB->getZExtValue())
std::swap(R1, R2); // R1 is the minimum root now.
// We can only use this value if the chrec ends up with an exact zero
@@ -6815,15 +6793,6 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
if (!ICI) return false;
- // Bail if the ICmp's operands' types are wider than the needed type
- // before attempting to call getSCEV on them. This avoids infinite
- // recursion, since the analysis of widening casts can require loop
- // exit condition information for overflow checking, which would
- // lead back here.
- if (getTypeSizeInBits(LHS->getType()) <
- getTypeSizeInBits(ICI->getOperand(0)->getType()))
- return false;
-
// Now that we found a conditional branch that dominates the loop or controls
// the loop latch. Check to see if it is the comparison we are looking for.
ICmpInst::Predicate FoundPred;
@@ -6835,9 +6804,17 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
- // Balance the types. The case where FoundLHS' type is wider than
- // LHS' type is checked for above.
- if (getTypeSizeInBits(LHS->getType()) >
+ // Balance the types.
+ if (getTypeSizeInBits(LHS->getType()) <
+ getTypeSizeInBits(FoundLHS->getType())) {
+ if (CmpInst::isSigned(Pred)) {
+ LHS = getSignExtendExpr(LHS, FoundLHS->getType());
+ RHS = getSignExtendExpr(RHS, FoundLHS->getType());
+ } else {
+ LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
+ RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
+ }
+ } else if (getTypeSizeInBits(LHS->getType()) >
getTypeSizeInBits(FoundLHS->getType())) {
if (CmpInst::isSigned(FoundPred)) {
FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
@@ -6963,6 +6940,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
+ if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
FoundLHS, FoundRHS) ||
// ~x < ~y --> x > y
@@ -7100,6 +7080,47 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
return false;
}
+/// isImpliedCondOperandsViaRanges - helper function for isImpliedCondOperands.
+/// Tries to get cases like "X `sgt` 0 => X - 1 `sgt` -1".
+bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
+ if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
+ // The restriction on `FoundRHS` be lifted easily -- it exists only to
+ // reduce the compile time impact of this optimization.
+ return false;
+
+ const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
+ if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
+ !isa<SCEVConstant>(AddLHS->getOperand(0)))
+ return false;
+
+ APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue();
+
+ // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
+ // antecedent "`FoundLHS` `Pred` `FoundRHS`".
+ ConstantRange FoundLHSRange =
+ ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
+
+ // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
+ // for `LHS`:
+ APInt Addend =
+ cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue();
+ ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
+
+ // We can also compute the range of values for `LHS` that satisfy the
+ // consequent, "`LHS` `Pred` `RHS`":
+ APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue();
+ ConstantRange SatisfyingLHSRange =
+ ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
+
+ // The antecedent implies the consequent if every value of `LHS` that
+ // satisfies the antecedent also satisfies the consequent.
+ return SatisfyingLHSRange.contains(LHSRange);
+}
+
// Verify if an linear IV with positive stride can overflow when in a
// less-than comparison, knowing the invariant term of the comparison, the
// stride and the knowledge of NSW/NUW flags on the recurrence.
@@ -7428,7 +7449,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
if (ConstantInt *CB =
dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
R1->getValue(), R2->getValue()))) {
- if (CB->getZExtValue() == false)
+ if (!CB->getZExtValue())
std::swap(R1, R2); // R1 is the minimum root now.
// Make sure the root is not off by one. The returned iteration should
@@ -7956,8 +7977,6 @@ bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return false;
@@ -8058,6 +8077,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
OS << " --> ";
const SCEV *SV = SE.getSCEV(&*I);
SV->print(OS);
+ if (!isa<SCEVCouldNotCompute>(SV)) {
+ OS << " U: ";
+ SE.getUnsignedRange(SV).print(OS);
+ OS << " S: ";
+ SE.getSignedRange(SV).print(OS);
+ }
const Loop *L = LI->getLoopFor((*I).getParent());
@@ -8065,6 +8090,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
if (AtUse != SV) {
OS << " --> ";
AtUse->print(OS);
+ if (!isa<SCEVCouldNotCompute>(AtUse)) {
+ OS << " U: ";
+ SE.getUnsignedRange(AtUse).print(OS);
+ OS << " S: ";
+ SE.getSignedRange(AtUse).print(OS);
+ }
}
if (L) {
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 5c339ee..ccec0a8 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -79,7 +80,7 @@ ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool
ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
- InitializeAliasAnalysis(this);
+ InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
SE = &getAnalysis<ScalarEvolution>();
return false;
}
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 2625cf3..a73ec9e 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -204,11 +205,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
/// check to see if the divide was folded.
-static bool FactorOutConstant(const SCEV *&S,
- const SCEV *&Remainder,
- const SCEV *Factor,
- ScalarEvolution &SE,
- const DataLayout *DL) {
+static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
+ const SCEV *Factor, ScalarEvolution &SE,
+ const DataLayout &DL) {
// Everything is divisible by one.
if (Factor->isOne())
return true;
@@ -248,35 +247,17 @@ static bool FactorOutConstant(const SCEV *&S,
// In a Mul, check if there is a constant operand which is a multiple
// of the given factor.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
- if (DL) {
- // With DataLayout, the size is known. Check if there is a constant
- // operand which is a multiple of the given factor. If so, we can
- // factor it.
- const SCEVConstant *FC = cast<SCEVConstant>(Factor);
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
- if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
- SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
- NewMulOps[0] =
- SE.getConstant(C->getValue()->getValue().sdiv(
- FC->getValue()->getValue()));
- S = SE.getMulExpr(NewMulOps);
- return true;
- }
- } else {
- // Without DataLayout, check if Factor can be factored out of any of the
- // Mul's operands. If so, we can just remove it.
- for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
- const SCEV *SOp = M->getOperand(i);
- const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
- if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) &&
- Remainder->isZero()) {
- SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
- NewMulOps[i] = SOp;
- S = SE.getMulExpr(NewMulOps);
- return true;
- }
+ // Size is known, check if there is a constant operand which is a multiple
+ // of the given factor. If so, we can factor it.
+ const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[0] = SE.getConstant(
+ C->getValue()->getValue().sdiv(FC->getValue()->getValue()));
+ S = SE.getMulExpr(NewMulOps);
+ return true;
}
- }
}
// In an AddRec, check if both start and step are divisible.
@@ -393,7 +374,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
PointerType *PTy,
Type *Ty,
Value *V) {
- Type *ElTy = PTy->getElementType();
+ Type *OriginalElTy = PTy->getElementType();
+ Type *ElTy = OriginalElTy;
SmallVector<Value *, 4> GepIndices;
SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
bool AnyNonZeroIndices = false;
@@ -402,9 +384,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// without the other.
SplitAddRecs(Ops, Ty, SE);
- Type *IntPtrTy = SE.DL
- ? SE.DL->getIntPtrType(PTy)
- : Type::getInt64Ty(PTy->getContext());
+ Type *IntPtrTy = DL.getIntPtrType(PTy);
// Descend down the pointer's type and attempt to convert the other
// operands into GEP indices, at each level. The first index in a GEP
@@ -422,7 +402,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
const SCEV *Op = Ops[i];
const SCEV *Remainder = SE.getConstant(Ty, 0);
- if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) {
+ if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
// Op now has ElSize factored out.
ScaledOps.push_back(Op);
if (!Remainder->isZero())
@@ -456,43 +436,25 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
bool FoundFieldNo = false;
// An empty struct has no fields.
if (STy->getNumElements() == 0) break;
- if (SE.DL) {
- // With DataLayout, field offsets are known. See if a constant offset
- // falls within any of the struct fields.
- if (Ops.empty()) break;
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
- if (SE.getTypeSizeInBits(C->getType()) <= 64) {
- const StructLayout &SL = *SE.DL->getStructLayout(STy);
- uint64_t FullOffset = C->getValue()->getZExtValue();
- if (FullOffset < SL.getSizeInBytes()) {
- unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
- GepIndices.push_back(
- ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
- ElTy = STy->getTypeAtIndex(ElIdx);
- Ops[0] =
+ // Field offsets are known. See if a constant offset falls within any of
+ // the struct fields.
+ if (Ops.empty())
+ break;
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *DL.getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(
+ ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
- AnyNonZeroIndices = true;
- FoundFieldNo = true;
- }
- }
- } else {
- // Without DataLayout, just check for an offsetof expression of the
- // appropriate struct type.
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
- Type *CTy;
- Constant *FieldNo;
- if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
- GepIndices.push_back(FieldNo);
- ElTy =
- STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
- Ops[i] = SE.getConstant(Ty, 0);
- AnyNonZeroIndices = true;
- FoundFieldNo = true;
- break;
- }
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
}
- }
+ }
// If no struct field offsets were found, tentatively assume that
// field zero was selected (since the zero offset would obviously
// be folded away).
@@ -597,7 +559,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *Casted = V;
if (V->getType() != PTy)
Casted = InsertNoopCastOfTo(Casted, PTy);
- Value *GEP = Builder.CreateGEP(Casted,
+ Value *GEP = Builder.CreateGEP(OriginalElTy, Casted,
GepIndices,
"scevgep");
Ops.push_back(SE.getUnknown(GEP));
@@ -1746,7 +1708,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// Fold constant phis. They may be congruent to other constant phis and
// would confuse the logic below that expects proper IVs.
- if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AC)) {
+ if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
Phi->replaceAllUsesWith(V);
DeadInsts.push_back(Phi);
++NumElim;
@@ -1811,9 +1773,12 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
<< *IsomorphicInc << '\n');
Value *NewInc = OrigInc;
if (OrigInc->getType() != IsomorphicInc->getType()) {
- Instruction *IP = isa<PHINode>(OrigInc)
- ? (Instruction*)L->getHeader()->getFirstInsertionPt()
- : OrigInc->getNextNode();
+ Instruction *IP = nullptr;
+ if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
+ IP = PN->getParent()->getFirstInsertionPt();
+ else
+ IP = OrigInc->getNextNode();
+
IRBuilder<> Builder(IP);
Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
NewInc = Builder.
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index c6ea3af..02f8b0b 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -80,7 +80,7 @@ public:
initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry());
}
- void initializePass() override { InitializeAliasAnalysis(this); }
+ bool doInitialization(Module &M) override;
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
@@ -119,6 +119,11 @@ ImmutablePass *llvm::createScopedNoAliasAAPass() {
return new ScopedNoAliasAA();
}
+bool ScopedNoAliasAA::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
+
void
ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 91041fc..7e574d5 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -13,341 +13,22 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
-const char* TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
- {
- "_IO_getc",
- "_IO_putc",
- "_ZdaPv",
- "_ZdaPvRKSt9nothrow_t",
- "_ZdaPvj",
- "_ZdaPvm",
- "_ZdlPv",
- "_ZdlPvRKSt9nothrow_t",
- "_ZdlPvj",
- "_ZdlPvm",
- "_Znaj",
- "_ZnajRKSt9nothrow_t",
- "_Znam",
- "_ZnamRKSt9nothrow_t",
- "_Znwj",
- "_ZnwjRKSt9nothrow_t",
- "_Znwm",
- "_ZnwmRKSt9nothrow_t",
- "__cospi",
- "__cospif",
- "__cxa_atexit",
- "__cxa_guard_abort",
- "__cxa_guard_acquire",
- "__cxa_guard_release",
- "__isoc99_scanf",
- "__isoc99_sscanf",
- "__memcpy_chk",
- "__memmove_chk",
- "__memset_chk",
- "__sincospi_stret",
- "__sincospif_stret",
- "__sinpi",
- "__sinpif",
- "__sqrt_finite",
- "__sqrtf_finite",
- "__sqrtl_finite",
- "__stpcpy_chk",
- "__stpncpy_chk",
- "__strcpy_chk",
- "__strdup",
- "__strncpy_chk",
- "__strndup",
- "__strtok_r",
- "abs",
- "access",
- "acos",
- "acosf",
- "acosh",
- "acoshf",
- "acoshl",
- "acosl",
- "asin",
- "asinf",
- "asinh",
- "asinhf",
- "asinhl",
- "asinl",
- "atan",
- "atan2",
- "atan2f",
- "atan2l",
- "atanf",
- "atanh",
- "atanhf",
- "atanhl",
- "atanl",
- "atof",
- "atoi",
- "atol",
- "atoll",
- "bcmp",
- "bcopy",
- "bzero",
- "calloc",
- "cbrt",
- "cbrtf",
- "cbrtl",
- "ceil",
- "ceilf",
- "ceill",
- "chmod",
- "chown",
- "clearerr",
- "closedir",
- "copysign",
- "copysignf",
- "copysignl",
- "cos",
- "cosf",
- "cosh",
- "coshf",
- "coshl",
- "cosl",
- "ctermid",
- "exp",
- "exp10",
- "exp10f",
- "exp10l",
- "exp2",
- "exp2f",
- "exp2l",
- "expf",
- "expl",
- "expm1",
- "expm1f",
- "expm1l",
- "fabs",
- "fabsf",
- "fabsl",
- "fclose",
- "fdopen",
- "feof",
- "ferror",
- "fflush",
- "ffs",
- "ffsl",
- "ffsll",
- "fgetc",
- "fgetpos",
- "fgets",
- "fileno",
- "fiprintf",
- "flockfile",
- "floor",
- "floorf",
- "floorl",
- "fmax",
- "fmaxf",
- "fmaxl",
- "fmin",
- "fminf",
- "fminl",
- "fmod",
- "fmodf",
- "fmodl",
- "fopen",
- "fopen64",
- "fprintf",
- "fputc",
- "fputs",
- "fread",
- "free",
- "frexp",
- "frexpf",
- "frexpl",
- "fscanf",
- "fseek",
- "fseeko",
- "fseeko64",
- "fsetpos",
- "fstat",
- "fstat64",
- "fstatvfs",
- "fstatvfs64",
- "ftell",
- "ftello",
- "ftello64",
- "ftrylockfile",
- "funlockfile",
- "fwrite",
- "getc",
- "getc_unlocked",
- "getchar",
- "getenv",
- "getitimer",
- "getlogin_r",
- "getpwnam",
- "gets",
- "gettimeofday",
- "htonl",
- "htons",
- "iprintf",
- "isascii",
- "isdigit",
- "labs",
- "lchown",
- "ldexp",
- "ldexpf",
- "ldexpl",
- "llabs",
- "log",
- "log10",
- "log10f",
- "log10l",
- "log1p",
- "log1pf",
- "log1pl",
- "log2",
- "log2f",
- "log2l",
- "logb",
- "logbf",
- "logbl",
- "logf",
- "logl",
- "lstat",
- "lstat64",
- "malloc",
- "memalign",
- "memccpy",
- "memchr",
- "memcmp",
- "memcpy",
- "memmove",
- "memrchr",
- "memset",
- "memset_pattern16",
- "mkdir",
- "mktime",
- "modf",
- "modff",
- "modfl",
- "nearbyint",
- "nearbyintf",
- "nearbyintl",
- "ntohl",
- "ntohs",
- "open",
- "open64",
- "opendir",
- "pclose",
- "perror",
- "popen",
- "posix_memalign",
- "pow",
- "powf",
- "powl",
- "pread",
- "printf",
- "putc",
- "putchar",
- "puts",
- "pwrite",
- "qsort",
- "read",
- "readlink",
- "realloc",
- "reallocf",
- "realpath",
- "remove",
- "rename",
- "rewind",
- "rint",
- "rintf",
- "rintl",
- "rmdir",
- "round",
- "roundf",
- "roundl",
- "scanf",
- "setbuf",
- "setitimer",
- "setvbuf",
- "sin",
- "sinf",
- "sinh",
- "sinhf",
- "sinhl",
- "sinl",
- "siprintf",
- "snprintf",
- "sprintf",
- "sqrt",
- "sqrtf",
- "sqrtl",
- "sscanf",
- "stat",
- "stat64",
- "statvfs",
- "statvfs64",
- "stpcpy",
- "stpncpy",
- "strcasecmp",
- "strcat",
- "strchr",
- "strcmp",
- "strcoll",
- "strcpy",
- "strcspn",
- "strdup",
- "strlen",
- "strncasecmp",
- "strncat",
- "strncmp",
- "strncpy",
- "strndup",
- "strnlen",
- "strpbrk",
- "strrchr",
- "strspn",
- "strstr",
- "strtod",
- "strtof",
- "strtok",
- "strtok_r",
- "strtol",
- "strtold",
- "strtoll",
- "strtoul",
- "strtoull",
- "strxfrm",
- "system",
- "tan",
- "tanf",
- "tanh",
- "tanhf",
- "tanhl",
- "tanl",
- "times",
- "tmpfile",
- "tmpfile64",
- "toascii",
- "trunc",
- "truncf",
- "truncl",
- "uname",
- "ungetc",
- "unlink",
- "unsetenv",
- "utime",
- "utimes",
- "valloc",
- "vfprintf",
- "vfscanf",
- "vprintf",
- "vscanf",
- "vsnprintf",
- "vsprintf",
- "vsscanf",
- "write"
- };
+static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
+ "vector-library", cl::Hidden, cl::desc("Vector functions library"),
+ cl::init(TargetLibraryInfoImpl::NoLibrary),
+ cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
+ "No vector functions library"),
+ clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
+ "Accelerate framework"),
+ clEnumValEnd));
+
+const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+#define TLI_DEFINE_STRING
+#include "llvm/Analysis/TargetLibraryInfo.def"
+};
static bool hasSinCosPiStret(const Triple &T) {
// Only Darwin variants have _stret versions of combined trig functions.
@@ -371,7 +52,7 @@ static bool hasSinCosPiStret(const Triple &T) {
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
- const char **StandardNames) {
+ const char *const *StandardNames) {
#ifndef NDEBUG
// Verify that the StandardNames array is in alphabetical order.
for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
@@ -674,6 +355,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc::statvfs64);
TLI.setUnavailable(LibFunc::tmpfile64);
}
+
+ TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
}
TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
@@ -693,12 +376,16 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) {
TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI)
: CustomNames(TLI.CustomNames) {
memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+ VectorDescs = TLI.VectorDescs;
+ ScalarDescs = TLI.ScalarDescs;
}
TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
: CustomNames(std::move(TLI.CustomNames)) {
std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
AvailableArray);
+ VectorDescs = TLI.VectorDescs;
+ ScalarDescs = TLI.ScalarDescs;
}
TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) {
@@ -714,40 +401,32 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&
return *this;
}
-namespace {
-struct StringComparator {
- /// Compare two strings and return true if LHS is lexicographically less than
- /// RHS. Requires that RHS doesn't contain any zero bytes.
- bool operator()(const char *LHS, StringRef RHS) const {
- // Compare prefixes with strncmp. If prefixes match we know that LHS is
- // greater or equal to RHS as RHS can't contain any '\0'.
- return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
- }
-
- // Provided for compatibility with MSVC's debug mode.
- bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; }
- bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; }
- bool operator()(const char *LHS, const char *RHS) const {
- return std::strcmp(LHS, RHS) < 0;
- }
-};
-}
-
-bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
- LibFunc::Func &F) const {
- const char **Start = &StandardNames[0];
- const char **End = &StandardNames[LibFunc::NumLibFuncs];
-
+static StringRef sanitizeFunctionName(StringRef funcName) {
// Filter out empty names and names containing null bytes, those can't be in
// our table.
if (funcName.empty() || funcName.find('\0') != StringRef::npos)
- return false;
+ return StringRef();
// Check for \01 prefix that is used to mangle __asm declarations and
// strip it if present.
if (funcName.front() == '\01')
funcName = funcName.substr(1);
- const char **I = std::lower_bound(Start, End, funcName, StringComparator());
+ return funcName;
+}
+
+bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
+ LibFunc::Func &F) const {
+ const char *const *Start = &StandardNames[0];
+ const char *const *End = &StandardNames[LibFunc::NumLibFuncs];
+
+ funcName = sanitizeFunctionName(funcName);
+ if (funcName.empty())
+ return false;
+
+ const char *const *I = std::lower_bound(
+ Start, End, funcName, [](const char *LHS, StringRef RHS) {
+ return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+ });
if (I != End && *I == funcName) {
F = (LibFunc::Func)(I - Start);
return true;
@@ -759,6 +438,94 @@ void TargetLibraryInfoImpl::disableAllFunctions() {
memset(AvailableArray, 0, sizeof(AvailableArray));
}
+static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
+ return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName,
+ std::strlen(RHS.ScalarFnName)) < 0;
+}
+
+static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
+ return std::strncmp(LHS.VectorFnName, RHS.VectorFnName,
+ std::strlen(RHS.VectorFnName)) < 0;
+}
+
+static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
+ return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0;
+}
+
+static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
+ return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0;
+}
+
+void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
+ VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
+ std::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
+
+ ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
+ std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
+}
+
+void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
+ enum VectorLibrary VecLib) {
+ switch (VecLib) {
+ case Accelerate: {
+ const VecDesc VecFuncs[] = {
+ {"expf", "vexpf", 4},
+ {"llvm.exp.f32", "vexpf", 4},
+ {"logf", "vlogf", 4},
+ {"llvm.log.f32", "vlogf", 4},
+ {"sqrtf", "vsqrtf", 4},
+ {"llvm.sqrt.f32", "vsqrtf", 4},
+ {"fabsf", "vfabsf", 4},
+ {"llvm.fabs.f32", "vfabsf", 4},
+ };
+ addVectorizableFunctions(VecFuncs);
+ break;
+ }
+ case NoLibrary:
+ break;
+ }
+}
+
+bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
+ funcName = sanitizeFunctionName(funcName);
+ if (funcName.empty())
+ return false;
+
+ std::vector<VecDesc>::const_iterator I = std::lower_bound(
+ VectorDescs.begin(), VectorDescs.end(), funcName,
+ compareWithScalarFnName);
+ return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
+}
+
+StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+ unsigned VF) const {
+ F = sanitizeFunctionName(F);
+ if (F.empty())
+ return F;
+ std::vector<VecDesc>::const_iterator I = std::lower_bound(
+ VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
+ while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
+ if (I->VectorizationFactor == VF)
+ return I->VectorFnName;
+ ++I;
+ }
+ return StringRef();
+}
+
+StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
+ unsigned &VF) const {
+ F = sanitizeFunctionName(F);
+ if (F.empty())
+ return F;
+
+ std::vector<VecDesc>::const_iterator I = std::lower_bound(
+ ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName);
+ if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F)
+ return StringRef();
+ VF = I->VectorizationFactor;
+ return I->ScalarFnName;
+}
+
TargetLibraryInfo TargetLibraryAnalysis::run(Module &M) {
if (PresetInfoImpl)
return TargetLibraryInfo(*PresetInfoImpl);
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 7ff29b0..f51c7f54 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -143,6 +143,10 @@ bool TargetTransformInfo::shouldBuildLookupTables() const {
return TTIImpl->shouldBuildLookupTables();
}
+bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
+ return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
+}
+
TargetTransformInfo::PopcntSupportKind
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
return TTIImpl->getPopcntSupport(IntTyWidthInBit);
@@ -233,6 +237,11 @@ TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
}
+unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ return TTIImpl->getCallInstrCost(F, RetTy, Tys);
+}
+
unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return TTIImpl->getNumberOfParts(Tp);
}
@@ -277,7 +286,7 @@ TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
char TargetIRAnalysis::PassID;
TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
- return Result(F.getParent()->getDataLayout());
+ return Result(&F.getParent()->getDataLayout());
}
// Register the basic pass.
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index ff89558..1158725 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -129,6 +129,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SetVector.h"
using namespace llvm;
// A handy option for disabling TBAA functionality. The same effect can also be
@@ -282,9 +283,7 @@ namespace {
initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
- void initializePass() override {
- InitializeAliasAnalysis(this);
- }
+ bool doInitialization(Module &M) override;
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
@@ -321,6 +320,11 @@ ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
return new TypeBasedAliasAnalysis();
}
+bool TypeBasedAliasAnalysis::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
+
void
TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -575,18 +579,22 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
if (!B) return nullptr;
}
- SmallVector<MDNode *, 4> PathA;
+ SmallSetVector<MDNode *, 4> PathA;
MDNode *T = A;
while (T) {
- PathA.push_back(T);
+ if (PathA.count(T))
+ report_fatal_error("Cycle found in TBAA metadata.");
+ PathA.insert(T);
T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
: nullptr;
}
- SmallVector<MDNode *, 4> PathB;
+ SmallSetVector<MDNode *, 4> PathB;
T = B;
while (T) {
- PathB.push_back(T);
+ if (PathB.count(T))
+ report_fatal_error("Cycle found in TBAA metadata.");
+ PathB.insert(T);
T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
: nullptr;
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 0458d28..f329e3a 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -39,13 +39,41 @@ using namespace llvm::PatternMatch;
const unsigned MaxDepth = 6;
+/// Enable an experimental feature to leverage information about dominating
+/// conditions to compute known bits. The individual options below control how
+/// hard we search. The defaults are choosen to be fairly aggressive. If you
+/// run into compile time problems when testing, scale them back and report
+/// your findings.
+static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
+ cl::Hidden, cl::init(false));
+
+// This is expensive, so we only do it for the top level query value.
+// (TODO: evaluate cost vs profit, consider higher thresholds)
+static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
+ cl::Hidden, cl::init(1));
+
+/// How many dominating blocks should be scanned looking for dominating
+/// conditions?
+static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
+ cl::Hidden,
+ cl::init(20000));
+
+// Controls the number of uses of the value searched for possible
+// dominating comparisons.
+static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
+ cl::Hidden, cl::init(2000));
+
+// If true, don't consider only compares whose only use is a branch.
+static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
+ cl::Hidden, cl::init(false));
+
/// Returns the bitwidth of the given scalar or pointer type (if unknown returns
/// 0). For vector types, returns the element type's bitwidth.
-static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
- return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
+ return DL.getPointerTypeSizeInBits(Ty);
}
// Many of these functions have internal versions that take an assumption
@@ -97,73 +125,73 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
}
static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth,
- const Query &Q);
+ const DataLayout &DL, unsigned Depth,
+ const Query &Q);
void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth,
+ const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth,
+ ::computeKnownBits(V, KnownZero, KnownOne, DL, Depth,
Query(AC, safeCxtI(V, CxtI), DT));
}
static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout *TD, unsigned Depth,
- const Query &Q);
+ const DataLayout &DL, unsigned Depth,
+ const Query &Q);
void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout *TD, unsigned Depth,
+ const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth,
+ ::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth,
Query(AC, safeCxtI(V, CxtI), DT));
}
static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
- const Query &Q);
+ const Query &Q, const DataLayout &DL);
-bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
+bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI,
const DominatorTree *DT) {
return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
- Query(AC, safeCxtI(V, CxtI), DT));
+ Query(AC, safeCxtI(V, CxtI), DT), DL);
}
-static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+static bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
const Query &Q);
-bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- return ::isKnownNonZero(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+ return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
}
-static bool MaskedValueIsZero(Value *V, const APInt &Mask,
- const DataLayout *TD, unsigned Depth,
- const Query &Q);
+static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+ unsigned Depth, const Query &Q);
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD,
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT) {
- return ::MaskedValueIsZero(V, Mask, TD, Depth,
+ return ::MaskedValueIsZero(V, Mask, DL, Depth,
Query(AC, safeCxtI(V, CxtI), DT));
}
-static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
+static unsigned ComputeNumSignBits(Value *V, const DataLayout &DL,
unsigned Depth, const Query &Q);
-unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
+unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
- return ::ComputeNumSignBits(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+ return ::ComputeNumSignBits(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
}
static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth,
+ const DataLayout &DL, unsigned Depth,
const Query &Q) {
if (!Add) {
if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
@@ -175,7 +203,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -194,8 +222,8 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
// If an initial sequence of bits in the result is not needed, the
// corresponding bits in the operands are not needed.
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q);
- computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, DL, Depth + 1, Q);
+ computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Carry in a 1 for a subtract, rather than a 0.
APInt CarryIn(BitWidth, 0);
@@ -243,11 +271,11 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth,
+ const DataLayout &DL, unsigned Depth,
const Query &Q) {
unsigned BitWidth = KnownZero.getBitWidth();
- computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(Op1, KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(Op0, KnownZero2, KnownOne2, DL, Depth + 1, Q);
bool isKnownNegative = false;
bool isKnownNonNegative = false;
@@ -268,9 +296,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
// negative or zero.
if (!isKnownNonNegative)
isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
- isKnownNonZero(Op0, TD, Depth, Q)) ||
+ isKnownNonZero(Op0, DL, Depth, Q)) ||
(isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
- isKnownNonZero(Op1, TD, Depth, Q));
+ isKnownNonZero(Op1, DL, Depth, Q));
}
}
@@ -382,8 +410,7 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
return false;
}
-static bool isValidAssumeForContext(Value *V, const Query &Q,
- const DataLayout *DL) {
+static bool isValidAssumeForContext(Value *V, const Query &Q) {
Instruction *Inv = cast<Instruction>(V);
// There are two restrictions on the use of an assume:
@@ -403,8 +430,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(Q.CxtI)),
IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(I, DL) &&
- !isAssumeLikeIntrinsic(I))
+ if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
return false;
return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -428,8 +454,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(Q.CxtI)),
IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(I, DL) &&
- !isAssumeLikeIntrinsic(I))
+ if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
return false;
return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -440,10 +465,9 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
bool llvm::isValidAssumeForContext(const Instruction *I,
const Instruction *CxtI,
- const DataLayout *DL,
const DominatorTree *DT) {
- return ::isValidAssumeForContext(const_cast<Instruction*>(I),
- Query(nullptr, CxtI, DT), DL);
+ return ::isValidAssumeForContext(const_cast<Instruction *>(I),
+ Query(nullptr, CxtI, DT));
}
template<typename LHS, typename RHS>
@@ -474,9 +498,181 @@ m_c_Xor(const LHS &L, const RHS &R) {
return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
}
+/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
+/// true (at the context instruction.) This is mostly a utility function for
+/// the prototype dominating conditions reasoning below.
+static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const DataLayout &DL,
+ unsigned Depth, const Query &Q) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ // TODO: We could potentially be more aggressive here. This would be worth
+ // evaluating. If we can, explore commoning this code with the assume
+ // handling logic.
+ if (LHS != V && RHS != V)
+ return;
+
+ const unsigned BitWidth = KnownZero.getBitWidth();
+
+ switch (Cmp->getPredicate()) {
+ default:
+ // We know nothing from this condition
+ break;
+ // TODO: implement unsigned bound from below (known one bits)
+ // TODO: common condition check implementations with assumes
+ // TODO: implement other patterns from assume (e.g. V & B == A)
+ case ICmpInst::ICMP_SGT:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
+ // We know that the sign bit is zero.
+ KnownZero |= APInt::getSignBit(BitWidth);
+ }
+ }
+ break;
+ case ICmpInst::ICMP_EQ:
+ if (LHS == V)
+ computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+ else if (RHS == V)
+ computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+ else
+ llvm_unreachable("missing use?");
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ // The known zero bits carry over
+ unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ // Whatever high bits in rhs are zero are known to be zero (if rhs is a
+ // power of 2, then one more).
+ unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+ if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL))
+ SignBits++;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+ }
+ break;
+ };
+}
+
+/// Compute known bits in 'V' from conditions which are known to be true along
+/// all paths leading to the context instruction. In particular, look for
+/// cases where one branch of an interesting condition dominates the context
+/// instruction. This does not do general dataflow.
+/// NOTE: This code is EXPERIMENTAL and currently off by default.
+static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
+ APInt &KnownOne,
+ const DataLayout &DL,
+ unsigned Depth,
+ const Query &Q) {
+ // Need both the dominator tree and the query location to do anything useful
+ if (!Q.DT || !Q.CxtI)
+ return;
+ Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+
+ // Avoid useless work
+ if (auto VI = dyn_cast<Instruction>(V))
+ if (VI->getParent() == Cxt->getParent())
+ return;
+
+ // Note: We currently implement two options. It's not clear which of these
+ // will survive long term, we need data for that.
+ // Option 1 - Try walking the dominator tree looking for conditions which
+ // might apply. This works well for local conditions (loop guards, etc..),
+ // but not as well for things far from the context instruction (presuming a
+ // low max blocks explored). If we can set an high enough limit, this would
+ // be all we need.
+ // Option 2 - We restrict out search to those conditions which are uses of
+ // the value we're interested in. This is independent of dom structure,
+ // but is slightly less powerful without looking through lots of use chains.
+ // It does handle conditions far from the context instruction (e.g. early
+ // function exits on entry) really well though.
+
+ // Option 1 - Search the dom tree
+ unsigned NumBlocksExplored = 0;
+ BasicBlock *Current = Cxt->getParent();
+ while (true) {
+ // Stop searching if we've gone too far up the chain
+ if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
+ break;
+ NumBlocksExplored++;
+
+ if (!Q.DT->getNode(Current)->getIDom())
+ break;
+ Current = Q.DT->getNode(Current)->getIDom()->getBlock();
+ if (!Current)
+ // found function entry
+ break;
+
+ BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
+ if (!BI || BI->isUnconditional())
+ continue;
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cmp)
+ continue;
+
+ // We're looking for conditions that are guaranteed to hold at the context
+ // instruction. Finding a condition where one path dominates the context
+ // isn't enough because both the true and false cases could merge before
+ // the context instruction we're actually interested in. Instead, we need
+ // to ensure that the taken *edge* dominates the context instruction.
+ BasicBlock *BB0 = BI->getSuccessor(0);
+ BasicBlockEdge Edge(BI->getParent(), BB0);
+ if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+ continue;
+
+ computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+ Q);
+ }
+
+ // Option 2 - Search the other uses of V
+ unsigned NumUsesExplored = 0;
+ for (auto U : V->users()) {
+ // Avoid massive lists
+ if (NumUsesExplored >= DomConditionsMaxUses)
+ break;
+ NumUsesExplored++;
+ // Consider only compare instructions uniquely controlling a branch
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+ if (!Cmp)
+ continue;
+
+ if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+ continue;
+
+ for (auto *CmpU : Cmp->users()) {
+ BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+ if (!BI || BI->isUnconditional())
+ continue;
+ // We're looking for conditions that are guaranteed to hold at the
+ // context instruction. Finding a condition where one path dominates
+ // the context isn't enough because both the true and false cases could
+ // merge before the context instruction we're actually interested in.
+ // Instead, we need to ensure that the taken *edge* dominates the context
+ // instruction.
+ BasicBlock *BB0 = BI->getSuccessor(0);
+ BasicBlockEdge Edge(BI->getParent(), BB0);
+ if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+ continue;
+
+ computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+ Q);
+ }
+ }
+}
+
static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
- APInt &KnownOne,
- const DataLayout *DL,
+ APInt &KnownOne, const DataLayout &DL,
unsigned Depth, const Query &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
// cannot use them!
@@ -504,8 +700,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
Value *Arg = I->getArgOperand(0);
- if (Arg == V &&
- isValidAssumeForContext(I, Q, DL)) {
+ if (Arg == V && isValidAssumeForContext(I, Q)) {
assert(BitWidth == 1 && "assume operand is not i1?");
KnownZero.clearAllBits();
KnownOne.setAllBits();
@@ -525,15 +720,15 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
ConstantInt *C;
// assume(v = a)
if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
KnownZero |= RHSKnownZero;
KnownOne |= RHSKnownOne;
// assume(v & b = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ } else if (match(Arg,
+ m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
@@ -546,7 +741,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(~(v & b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
@@ -557,9 +752,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne & MaskKnownOne;
KnownOne |= RHSKnownZero & MaskKnownOne;
// assume(v | b = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ } else if (match(Arg,
+ m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -572,7 +767,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(~(v | b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -583,9 +778,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne & BKnownZero;
KnownOne |= RHSKnownZero & BKnownZero;
// assume(v ^ b = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ } else if (match(Arg,
+ m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -601,7 +796,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(~(v ^ b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -617,7 +812,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(v << c = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
@@ -627,7 +822,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(~(v << c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
@@ -637,10 +832,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(v >> c = a)
} else if (match(Arg,
m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
- m_AShr(m_V,
- m_ConstantInt(C))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ m_AShr(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
@@ -649,10 +843,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownOne |= RHSKnownOne << C->getZExtValue();
// assume(~(v >> c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
- m_LShr(m_V, m_ConstantInt(C)),
- m_AShr(m_V, m_ConstantInt(C)))),
+ m_LShr(m_V, m_ConstantInt(C)),
+ m_AShr(m_V, m_ConstantInt(C)))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
@@ -661,8 +855,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownOne |= RHSKnownZero << C->getZExtValue();
// assume(v >=_s c) where c is non-negative
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SGE &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -672,8 +865,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
// assume(v >_s c) where c is at least -1.
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SGT &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -683,8 +875,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
// assume(v <=_s c) where c is negative
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SLE &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -694,8 +885,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
// assume(v <_s c) where c is non-positive
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SLT &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -705,8 +895,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
// assume(v <=_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_ULE &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -715,14 +904,13 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
// assume(v <_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_ULT &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// Whatever high bits in c are zero are known to be zero (if c is a power
// of 2, then one more).
- if (isKnownToBeAPowerOfTwo(A, false, Depth+1, Query(Q, I)))
+ if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I), DL))
KnownZero |=
APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1);
else
@@ -743,13 +931,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
/// this won't lose us code quality.
///
/// This function is defined on values with integer type, values with pointer
-/// type (but only if TD is non-null), and vectors of integers. In the case
+/// type, and vectors of integers. In the case
/// where V is a vector, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth,
- const Query &Q) {
+ const DataLayout &DL, unsigned Depth, const Query &Q) {
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = KnownZero.getBitWidth();
@@ -757,8 +944,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
assert((V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarType()->isPointerTy()) &&
"Not integer or pointer type!");
- assert((!TD ||
- TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+ assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
(!V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
@@ -797,7 +983,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// The address of an aligned GlobalValue has trailing zeros.
if (auto *GO = dyn_cast<GlobalObject>(V)) {
unsigned Align = GO->getAlignment();
- if (Align == 0 && TD) {
+ if (Align == 0) {
if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
Type *ObjectType = GVar->getType()->getElementType();
if (ObjectType->isSized()) {
@@ -805,9 +991,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// it the preferred alignment. Otherwise, we have to assume that it
// may only have the minimum ABI alignment.
if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
- Align = TD->getPreferredAlignment(GVar);
+ Align = DL.getPreferredAlignment(GVar);
else
- Align = TD->getABITypeAlignment(ObjectType);
+ Align = DL.getABITypeAlignment(ObjectType);
}
}
}
@@ -823,11 +1009,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (Argument *A = dyn_cast<Argument>(V)) {
unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
- if (!Align && TD && A->hasStructRetAttr()) {
+ if (!Align && A->hasStructRetAttr()) {
// An sret parameter has at least the ABI alignment of the return type.
Type *EltTy = cast<PointerType>(A->getType())->getElementType();
if (EltTy->isSized())
- Align = TD->getABITypeAlignment(EltTy);
+ Align = DL.getABITypeAlignment(EltTy);
}
if (Align)
@@ -838,7 +1024,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Don't give up yet... there might be an assumption that provides more
// information...
- computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+ // Or a dominating condition for that matter
+ if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+ computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
+ Depth, Q);
return;
}
@@ -854,12 +1045,18 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// the bits of its aliasee.
if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (!GA->mayBeOverridden())
- computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth + 1, Q);
+ computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q);
return;
}
// Check whether a nearby assume intrinsic can determine some known bits.
- computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+ // Check whether there's a dominating condition which implies something about
+ // this value at the given context.
+ if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+ computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
+ Q);
Operator *I = dyn_cast<Operator>(V);
if (!I) return;
@@ -873,8 +1070,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Output known-1 bits are only known if set in both the LHS & RHS.
KnownOne &= KnownOne2;
@@ -883,8 +1080,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Or: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Output known-0 bits are only known if clear in both the LHS & RHS.
KnownZero &= KnownZero2;
@@ -893,8 +1090,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Xor: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
@@ -905,21 +1102,20 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
}
case Instruction::Mul: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
- computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth, Q);
+ computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero,
+ KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
break;
}
case Instruction::UDiv: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
@@ -929,8 +1125,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Select:
- computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(2), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
@@ -946,8 +1142,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::AddrSpaceCast: // Pointers could be different sizes.
- // We can't handle these if we don't know the pointer size.
- if (!TD) break;
// FALL THROUGH and handle them the same as zext/trunc.
case Instruction::ZExt:
case Instruction::Trunc: {
@@ -956,17 +1150,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
unsigned SrcBitWidth;
// Note that we handle pointer operands here because of inttoptr/ptrtoint
// which fall through here.
- if(TD) {
- SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType());
- } else {
- SrcBitWidth = SrcTy->getScalarSizeInBits();
- if (!SrcBitWidth) break;
- }
+ SrcBitWidth = DL.getTypeSizeInBits(SrcTy->getScalarType());
assert(SrcBitWidth && "SrcBitWidth can't be zero");
KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero = KnownZero.zextOrTrunc(BitWidth);
KnownOne = KnownOne.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
@@ -980,7 +1169,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
break;
}
break;
@@ -991,7 +1180,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -1007,7 +1196,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero <<= ShiftAmt;
KnownOne <<= ShiftAmt;
KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
@@ -1020,7 +1209,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
// Unsigned shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
// high bits known zero.
@@ -1034,7 +1223,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -1048,15 +1237,15 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Instruction::Sub: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth, Q);
+ KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+ Depth, Q);
break;
}
case Instruction::Add: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth, Q);
+ KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+ Depth, Q);
break;
}
case Instruction::SRem:
@@ -1064,8 +1253,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD,
- Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1,
+ Q);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -1089,8 +1278,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// remainder is zero.
if (KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
- Depth+1, Q);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, DL,
+ Depth + 1, Q);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero.setBit(BitWidth - 1);
@@ -1102,8 +1291,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
APInt RA = Rem->getValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD,
- Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1,
+ Q);
KnownZero |= ~LowBits;
KnownOne &= LowBits;
break;
@@ -1112,8 +1301,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
@@ -1125,8 +1314,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Instruction::Alloca: {
AllocaInst *AI = cast<AllocaInst>(V);
unsigned Align = AI->getAlignment();
- if (Align == 0 && TD)
- Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(AI->getType()->getElementType());
if (Align > 0)
KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
@@ -1136,8 +1325,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Analyze all of the subscripts of this getelementptr instruction
// to determine if we can prove known low zero bits.
APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
- Depth+1, Q);
+ computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, DL,
+ Depth + 1, Q);
unsigned TrailZ = LocalKnownZero.countTrailingOnes();
gep_type_iterator GTI = gep_type_begin(I);
@@ -1145,10 +1334,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Value *Index = I->getOperand(i);
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
// Handle struct member offset arithmetic.
- if (!TD) {
- TrailZ = 0;
- break;
- }
// Handle case when index is vector zeroinitializer
Constant *CIndex = cast<Constant>(Index);
@@ -1159,7 +1344,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Index = CIndex->getSplatValue();
unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
- const StructLayout *SL = TD->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
uint64_t Offset = SL->getElementOffset(Idx);
TrailZ = std::min<unsigned>(TrailZ,
countTrailingZeros(Offset));
@@ -1171,9 +1356,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
- uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+ uint64_t TypeSize = DL.getTypeAllocSize(IndexedTy);
LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q);
+ computeKnownBits(Index, LocalKnownZero, LocalKnownOne, DL, Depth + 1,
+ Q);
TrailZ = std::min(TrailZ,
unsigned(countTrailingZeros(TypeSize) +
LocalKnownZero.countTrailingOnes()));
@@ -1215,11 +1401,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
- computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(R, KnownZero2, KnownOne2, DL, Depth + 1, Q);
// We need to take the minimum number of known bits
APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
- computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q);
+ computeKnownBits(L, KnownZero3, KnownOne3, DL, Depth + 1, Q);
KnownZero = APInt::getLowBitsSet(BitWidth,
std::min(KnownZero2.countTrailingOnes(),
@@ -1250,8 +1436,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownOne2 = APInt(BitWidth, 0);
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
- computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
- MaxDepth-1, Q);
+ computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, DL,
+ MaxDepth - 1, Q);
KnownZero &= KnownZero2;
KnownOne &= KnownOne2;
// If all bits have been ruled out, there's no need to check
@@ -1303,19 +1489,19 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Intrinsic::sadd_with_overflow:
computeKnownBitsAddSub(true, II->getArgOperand(0),
II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
+ KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
computeKnownBitsAddSub(false, II->getArgOperand(0),
II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
+ KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
- computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1),
- false, KnownZero, KnownOne,
- KnownZero2, KnownOne2, TD, Depth, Q);
+ computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
+ KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+ Depth, Q);
break;
}
}
@@ -1328,9 +1514,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
/// Determine whether the sign bit is known to be zero or one.
/// Convenience wrapper around computeKnownBits.
void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout *TD, unsigned Depth,
- const Query &Q) {
- unsigned BitWidth = getBitWidth(V->getType(), TD);
+ const DataLayout &DL, unsigned Depth, const Query &Q) {
+ unsigned BitWidth = getBitWidth(V->getType(), DL);
if (!BitWidth) {
KnownZero = false;
KnownOne = false;
@@ -1338,7 +1523,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
}
APInt ZeroBits(BitWidth, 0);
APInt OneBits(BitWidth, 0);
- computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q);
+ computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q);
KnownOne = OneBits[BitWidth - 1];
KnownZero = ZeroBits[BitWidth - 1];
}
@@ -1348,7 +1533,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
/// be a power of two when defined. Supports values with integer or pointer
/// types and vectors of integers.
bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
- const Query &Q) {
+ const Query &Q, const DataLayout &DL) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return OrZero;
@@ -1375,20 +1560,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
// A shift of a power of two is a power of two or zero.
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
match(V, m_Shr(m_Value(X), m_Value()))))
- return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q);
+ return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL);
if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
- return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
+ return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q, DL);
if (SelectInst *SI = dyn_cast<SelectInst>(V))
- return
- isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
- isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
+ return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q, DL) &&
+ isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q, DL);
if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
// A power of two and'd with anything is a power of two or zero.
- if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) ||
- isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q))
+ if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL) ||
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q, DL))
return true;
// X & (-X) is always a power of two or zero.
if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
@@ -1403,19 +1587,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
if (match(X, m_And(m_Specific(Y), m_Value())) ||
match(X, m_And(m_Value(), m_Specific(Y))))
- if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
+ if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q, DL))
return true;
if (match(Y, m_And(m_Specific(X), m_Value())) ||
match(Y, m_And(m_Value(), m_Specific(X))))
- if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
+ if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q, DL))
return true;
unsigned BitWidth = V->getType()->getScalarSizeInBits();
APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
- computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q);
+ computeKnownBits(X, LHSZeroBits, LHSOneBits, DL, Depth, Q);
APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
- computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q);
+ computeKnownBits(Y, RHSZeroBits, RHSOneBits, DL, Depth, Q);
// If i8 V is a power of two or zero:
// ZeroBits: 1 1 1 0 1 1 1 1
// ~ZeroBits: 0 0 0 1 0 0 0 0
@@ -1433,7 +1617,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
- Depth, Q);
+ Depth, Q, DL);
}
return false;
@@ -1445,7 +1629,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
/// to be non-null.
///
/// Currently this routine does not support vector GEPs.
-static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
+static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL,
unsigned Depth, const Query &Q) {
if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
return false;
@@ -1458,10 +1642,6 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q))
return true;
- // Past this, if we don't have DataLayout, we can't do much.
- if (!DL)
- return false;
-
// Walk the GEP operands and see if any operand introduces a non-zero offset.
// If so, then the GEP cannot produce a null pointer, as doing so would
// inherently violate the inbounds contract within address space zero.
@@ -1471,7 +1651,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
unsigned ElementIdx = OpC->getZExtValue();
- const StructLayout *SL = DL->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
if (ElementOffset > 0)
return true;
@@ -1479,7 +1659,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
}
// If we have a zero-sized type, the index doesn't matter. Keep looping.
- if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0)
+ if (DL.getTypeAllocSize(GTI.getIndexedType()) == 0)
continue;
// Fast path the constant operand case both for efficiency and so we don't
@@ -1528,7 +1708,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges,
/// For vectors return true if every element is known to be non-zero when
/// defined. Supports values with integer or pointer type and vectors of
/// integers.
-bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
const Query &Q) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
@@ -1561,21 +1741,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
if (isKnownNonNull(V))
return true;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- if (isGEPKnownNonNull(GEP, TD, Depth, Q))
+ if (isGEPKnownNonNull(GEP, DL, Depth, Q))
return true;
}
- unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD);
+ unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), DL);
// X | Y != 0 if X != 0 or Y != 0.
Value *X = nullptr, *Y = nullptr;
if (match(V, m_Or(m_Value(X), m_Value(Y))))
- return isKnownNonZero(X, TD, Depth, Q) ||
- isKnownNonZero(Y, TD, Depth, Q);
+ return isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q);
// ext X != 0 if X != 0.
if (isa<SExtInst>(V) || isa<ZExtInst>(V))
- return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth, Q);
+ return isKnownNonZero(cast<Instruction>(V)->getOperand(0), DL, Depth, Q);
// shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
// if the lowest bit is shifted off the end.
@@ -1583,11 +1762,11 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
// shl nuw can't remove any non-zero bits.
OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
if (BO->hasNoUnsignedWrap())
- return isKnownNonZero(X, TD, Depth, Q);
+ return isKnownNonZero(X, DL, Depth, Q);
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
if (KnownOne[0])
return true;
}
@@ -1597,29 +1776,28 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
// shr exact can only shift out zero bits.
PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
if (BO->isExact())
- return isKnownNonZero(X, TD, Depth, Q);
+ return isKnownNonZero(X, DL, Depth, Q);
bool XKnownNonNegative, XKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
if (XKnownNegative)
return true;
}
// div exact can only produce a zero if the dividend is zero.
else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
- return isKnownNonZero(X, TD, Depth, Q);
+ return isKnownNonZero(X, DL, Depth, Q);
}
// X + Y.
else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
bool XKnownNonNegative, XKnownNegative;
bool YKnownNonNegative, YKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
- ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q);
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
+ ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, DL, Depth, Q);
// If X and Y are both non-negative (as signed values) then their sum is not
// zero unless both X and Y are zero.
if (XKnownNonNegative && YKnownNonNegative)
- if (isKnownNonZero(X, TD, Depth, Q) ||
- isKnownNonZero(Y, TD, Depth, Q))
+ if (isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q))
return true;
// If X and Y are both negative (as signed values) then their sum is not
@@ -1630,22 +1808,22 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
APInt Mask = APInt::getSignedMaxValue(BitWidth);
// The sign bit of X is set. If some other bit is set then X is not equal
// to INT_MIN.
- computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
if ((KnownOne & Mask) != 0)
return true;
// The sign bit of Y is set. If some other bit is set then Y is not equal
// to INT_MIN.
- computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(Y, KnownZero, KnownOne, DL, Depth, Q);
if ((KnownOne & Mask) != 0)
return true;
}
// The sum of a non-negative number and a power of two is not zero.
if (XKnownNonNegative &&
- isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q))
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q, DL))
return true;
if (YKnownNonNegative &&
- isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q))
+ isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q, DL))
return true;
}
// X * Y.
@@ -1654,21 +1832,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
- isKnownNonZero(X, TD, Depth, Q) &&
- isKnownNonZero(Y, TD, Depth, Q))
+ isKnownNonZero(X, DL, Depth, Q) && isKnownNonZero(Y, DL, Depth, Q))
return true;
}
// (C ? X : Y) != 0 if X != 0 and Y != 0.
else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
- if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) &&
- isKnownNonZero(SI->getFalseValue(), TD, Depth, Q))
+ if (isKnownNonZero(SI->getTrueValue(), DL, Depth, Q) &&
+ isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))
return true;
}
if (!BitWidth) return false;
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
return KnownOne != 0;
}
@@ -1677,15 +1854,14 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
/// cannot have.
///
/// This function is defined on values with integer type, values with pointer
-/// type (but only if TD is non-null), and vectors of integers. In the case
+/// type, and vectors of integers. In the case
/// where V is a vector, the mask, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-bool MaskedValueIsZero(Value *V, const APInt &Mask,
- const DataLayout *TD, unsigned Depth,
- const Query &Q) {
+bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+ unsigned Depth, const Query &Q) {
APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
return (KnownZero & Mask) == Mask;
}
@@ -1699,14 +1875,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask,
///
/// 'Op' must have a scalar integer type.
///
-unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
- unsigned Depth, const Query &Q) {
- assert((TD || V->getType()->isIntOrIntVectorTy()) &&
- "ComputeNumSignBits requires a DataLayout object to operate "
- "on non-integer values!");
- Type *Ty = V->getType();
- unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
- Ty->getScalarSizeInBits();
+unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth,
+ const Query &Q) {
+ unsigned TyBits = DL.getTypeSizeInBits(V->getType()->getScalarType());
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
@@ -1721,10 +1892,63 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
default: break;
case Instruction::SExt:
Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
- return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp;
+ return ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q) + Tmp;
+
+ case Instruction::SDiv: {
+ const APInt *Denominator;
+ // sdiv X, C -> adds log(C) sign bits.
+ if (match(U->getOperand(1), m_APInt(Denominator))) {
+
+ // Ignore non-positive denominator.
+ if (!Denominator->isStrictlyPositive())
+ break;
+
+ // Calculate the incoming numerator bits.
+ unsigned NumBits = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+
+ // Add floor(log(C)) bits to the numerator bits.
+ return std::min(TyBits, NumBits + Denominator->logBase2());
+ }
+ break;
+ }
+
+ case Instruction::SRem: {
+ const APInt *Denominator;
+ // srem X, C -> we know that the result is within [-C+1,C) when C is a
+ // positive constant. This let us put a lower bound on the number of sign
+ // bits.
+ if (match(U->getOperand(1), m_APInt(Denominator))) {
+
+ // Ignore non-positive denominator.
+ if (!Denominator->isStrictlyPositive())
+ break;
+
+ // Calculate the incoming numerator bits. SRem by a positive constant
+ // can't lower the number of sign bits.
+ unsigned NumrBits =
+ ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+
+ // Calculate the leading sign bit constraints by examining the
+ // denominator. Given that the denominator is positive, there are two
+ // cases:
+ //
+ // 1. the numerator is positive. The result range is [0,C) and [0,C) u<
+ // (1 << ceilLogBase2(C)).
+ //
+ // 2. the numerator is negative. Then the result range is (-C,0] and
+ // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
+ //
+ // Thus a lower bound on the number of sign bits is `TyBits -
+ // ceilLogBase2(C)`.
+
+ unsigned ResBits = TyBits - Denominator->ceilLogBase2();
+ return std::max(NumrBits, ResBits);
+ }
+ break;
+ }
case Instruction::AShr: {
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
// ashr X, C -> adds C sign bits. Vectors too.
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
@@ -1737,7 +1961,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
// shl destroys sign bits.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
Tmp2 = ShAmt->getZExtValue();
if (Tmp2 >= TyBits || // Bad shift.
Tmp2 >= Tmp) break; // Shifted all sign bits out.
@@ -1749,9 +1973,9 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
case Instruction::Or:
case Instruction::Xor: // NOT is handled here.
// Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
@@ -1760,22 +1984,23 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
break;
case Instruction::Select:
- Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q);
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), DL, Depth + 1, Q);
return std::min(Tmp, Tmp2);
case Instruction::Add:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
if (Tmp == 1) return 1; // Early out.
// Special case decrementing a value (ADD X, -1):
if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, Depth + 1,
+ Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -1788,19 +2013,20 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
return Tmp;
}
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
if (Tmp2 == 1) return 1;
return std::min(Tmp, Tmp2)-1;
case Instruction::Sub:
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
if (Tmp2 == 1) return 1;
// Handle NEG.
if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(U->getOperand(1), KnownZero, KnownOne, DL, Depth + 1,
+ Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
@@ -1816,7 +2042,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
// Sub can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
@@ -1830,12 +2056,11 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
- Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), DL, Depth + 1, Q);
for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
if (Tmp == 1) return Tmp;
- Tmp = std::min(Tmp,
- ComputeNumSignBits(PN->getIncomingValue(i), TD,
- Depth+1, Q));
+ Tmp = std::min(
+ Tmp, ComputeNumSignBits(PN->getIncomingValue(i), DL, Depth + 1, Q));
}
return Tmp;
}
@@ -1850,7 +2075,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
// use this information.
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
APInt Mask;
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
@@ -2132,9 +2357,7 @@ Value *llvm::isBytewiseValue(Value *V) {
if (CI->getBitWidth() % 8 == 0) {
assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
- // We can check that all bytes of an integer are equal by making use of a
- // little trick: rotate by 8 and check if it's still the same value.
- if (CI->getValue() != CI->getValue().rotl(8))
+ if (!CI->getValue().isSplat(8))
return nullptr;
return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
}
@@ -2335,23 +2558,19 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
/// Analyze the specified pointer to see if it can be expressed as a base
/// pointer plus a constant offset. Return the base and offset to the caller.
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout *DL) {
- // Without DataLayout, conservatively assume 64-bit offsets, which is
- // the widest we support.
- unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64;
+ const DataLayout &DL) {
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
APInt ByteOffset(BitWidth, 0);
while (1) {
if (Ptr->getType()->isVectorTy())
break;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- if (DL) {
- APInt GEPOffset(BitWidth, 0);
- if (!GEP->accumulateConstantOffset(*DL, GEPOffset))
- break;
+ APInt GEPOffset(BitWidth, 0);
+ if (!GEP->accumulateConstantOffset(DL, GEPOffset))
+ break;
- ByteOffset += GEPOffset;
- }
+ ByteOffset += GEPOffset;
Ptr = GEP->getPointerOperand();
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -2380,7 +2599,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
// Look through bitcast instructions and geps.
V = V->stripPointerCasts();
- // If the value is a GEP instructionor constant expression, treat it as an
+ // If the value is a GEP instruction or constant expression, treat it as an
// offset.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// Make sure the GEP has exactly three arguments.
@@ -2407,7 +2626,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
StartIdx = CI->getZExtValue();
else
return false;
- return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset);
+ return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset,
+ TrimAtNul);
}
// The GEP instruction, constant or instruction, must reference a global
@@ -2517,8 +2737,8 @@ uint64_t llvm::GetStringLength(Value *V) {
return Len == ~0ULL ? 1 : Len;
}
-Value *
-llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
+Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
+ unsigned MaxLookup) {
if (!V->getType()->isPointerTy())
return V;
for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
@@ -2535,7 +2755,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
// See if InstructionSimplify knows any relevant tricks.
if (Instruction *I = dyn_cast<Instruction>(V))
// TODO: Acquire a DominatorTree and AssumptionCache and use them.
- if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) {
+ if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) {
V = Simplified;
continue;
}
@@ -2547,17 +2767,14 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
return V;
}
-void
-llvm::GetUnderlyingObjects(Value *V,
- SmallVectorImpl<Value *> &Objects,
- const DataLayout *TD,
- unsigned MaxLookup) {
+void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+ const DataLayout &DL, unsigned MaxLookup) {
SmallPtrSet<Value *, 4> Visited;
SmallVector<Value *, 4> Worklist;
Worklist.push_back(V);
do {
Value *P = Worklist.pop_back_val();
- P = GetUnderlyingObject(P, TD, MaxLookup);
+ P = GetUnderlyingObject(P, DL, MaxLookup);
if (!Visited.insert(P).second)
continue;
@@ -2591,8 +2808,7 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
return true;
}
-bool llvm::isSafeToSpeculativelyExecute(const Value *V,
- const DataLayout *TD) {
+bool llvm::isSafeToSpeculativelyExecute(const Value *V) {
const Operator *Inst = dyn_cast<Operator>(V);
if (!Inst)
return false;
@@ -2638,7 +2854,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
// Speculative load may create a race that did not exist in the source.
LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return false;
- return LI->getPointerOperand()->isDereferenceablePointer(TD);
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ return LI->getPointerOperand()->isDereferenceablePointer(DL);
}
case Instruction::Call: {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -2730,7 +2947,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
}
OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
- const DataLayout *DL,
+ const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
@@ -2780,7 +2997,7 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
}
OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
- const DataLayout *DL,
+ const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 9e7354e..103c8c4 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -2270,13 +2270,13 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
/// forward reference record if needed.
BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
LocTy Loc) {
- return cast_or_null<BasicBlock>(GetVal(Name,
- Type::getLabelTy(F.getContext()), Loc));
+ return dyn_cast_or_null<BasicBlock>(GetVal(Name,
+ Type::getLabelTy(F.getContext()), Loc));
}
BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
- return cast_or_null<BasicBlock>(GetVal(ID,
- Type::getLabelTy(F.getContext()), Loc));
+ return dyn_cast_or_null<BasicBlock>(GetVal(ID,
+ Type::getLabelTy(F.getContext()), Loc));
}
/// DefineBB - Define the specified basic block, which is either named or
@@ -2512,7 +2512,12 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
if (!F) {
// Make a global variable as a placeholder for this reference.
- GlobalValue *&FwdRef = ForwardRefBlockAddresses[Fn][Label];
+ GlobalValue *&FwdRef =
+ ForwardRefBlockAddresses.insert(std::make_pair(
+ std::move(Fn),
+ std::map<ValID, GlobalValue *>()))
+ .first->second.insert(std::make_pair(std::move(Label), nullptr))
+ .first->second;
if (!FwdRef)
FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), false,
GlobalValue::InternalLinkage, nullptr, "");
@@ -2772,11 +2777,23 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
unsigned Opc = Lex.getUIntVal();
SmallVector<Constant*, 16> Elts;
bool InBounds = false;
+ Type *Ty;
Lex.Lex();
+
if (Opc == Instruction::GetElementPtr)
InBounds = EatIfPresent(lltok::kw_inbounds);
- if (ParseToken(lltok::lparen, "expected '(' in constantexpr") ||
- ParseGlobalValueVector(Elts) ||
+
+ if (ParseToken(lltok::lparen, "expected '(' in constantexpr"))
+ return true;
+
+ LocTy ExplicitTypeLoc = Lex.getLoc();
+ if (Opc == Instruction::GetElementPtr) {
+ if (ParseType(Ty) ||
+ ParseToken(lltok::comma, "expected comma after getelementptr's type"))
+ return true;
+ }
+
+ if (ParseGlobalValueVector(Elts) ||
ParseToken(lltok::rparen, "expected ')' in constantexpr"))
return true;
@@ -2787,6 +2804,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
Type *BaseType = Elts[0]->getType();
auto *BasePointerType = cast<PointerType>(BaseType->getScalarType());
+ if (Ty != BasePointerType->getElementType())
+ return Error(
+ ExplicitTypeLoc,
+ "explicit pointee type doesn't match operand's pointee type");
ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
for (Constant *Val : Indices) {
@@ -2805,7 +2826,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
}
}
- if (!Indices.empty() && !BasePointerType->getElementType()->isSized())
+ SmallPtrSet<const Type*, 4> Visited;
+ if (!Indices.empty() &&
+ !BasePointerType->getElementType()->isSized(&Visited))
return Error(ID.Loc, "base element of getelementptr must be sized");
if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), Indices))
@@ -2976,6 +2999,8 @@ struct ColumnField : public MDUnsignedField {
};
struct DwarfTagField : public MDUnsignedField {
DwarfTagField() : MDUnsignedField(0, dwarf::DW_TAG_hi_user) {}
+ DwarfTagField(dwarf::Tag DefaultTag)
+ : MDUnsignedField(DefaultTag, dwarf::DW_TAG_hi_user) {}
};
struct DwarfAttEncodingField : public MDUnsignedField {
DwarfAttEncodingField() : MDUnsignedField(0, dwarf::DW_ATE_hi_user) {}
@@ -3323,8 +3348,8 @@ bool LLParser::ParseMDLocation(MDNode *&Result, bool IsDistinct) {
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
- auto get = (IsDistinct ? MDLocation::getDistinct : MDLocation::get);
- Result = get(Context, line.Val, column.Val, scope.Val, inlinedAt.Val);
+ Result = GET_OR_DISTINCT(
+ MDLocation, (Context, line.Val, column.Val, scope.Val, inlinedAt.Val));
return false;
}
@@ -3373,7 +3398,7 @@ bool LLParser::ParseMDEnumerator(MDNode *&Result, bool IsDistinct) {
/// ::= !MDBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32)
bool LLParser::ParseMDBasicType(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
- REQUIRED(tag, DwarfTagField, ); \
+ OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type)); \
OPTIONAL(name, MDStringField, ); \
OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX)); \
OPTIONAL(align, MDUnsignedField, (0, UINT64_MAX)); \
@@ -3509,7 +3534,7 @@ bool LLParser::ParseMDCompileUnit(MDNode *&Result, bool IsDistinct) {
bool LLParser::ParseMDSubprogram(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(scope, MDField, ); \
- REQUIRED(name, MDStringField, ); \
+ OPTIONAL(name, MDStringField, ); \
OPTIONAL(linkageName, MDStringField, ); \
OPTIONAL(file, MDField, ); \
OPTIONAL(line, LineField, ); \
@@ -3604,9 +3629,9 @@ bool LLParser::ParseMDTemplateTypeParameter(MDNode *&Result, bool IsDistinct) {
/// name: "V", type: !1, value: i32 7)
bool LLParser::ParseMDTemplateValueParameter(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
- REQUIRED(tag, DwarfTagField, ); \
+ OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_template_value_parameter)); \
OPTIONAL(name, MDStringField, ); \
- REQUIRED(type, MDField, ); \
+ OPTIONAL(type, MDField, ); \
REQUIRED(value, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -3624,7 +3649,7 @@ bool LLParser::ParseMDTemplateValueParameter(MDNode *&Result, bool IsDistinct) {
bool LLParser::ParseMDGlobalVariable(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
OPTIONAL(scope, MDField, ); \
- REQUIRED(name, MDStringField, ); \
+ OPTIONAL(name, MDStringField, ); \
OPTIONAL(linkageName, MDStringField, ); \
OPTIONAL(file, MDField, ); \
OPTIONAL(line, LineField, ); \
@@ -3710,7 +3735,7 @@ bool LLParser::ParseMDExpression(MDNode *&Result, bool IsDistinct) {
/// getter: "getFoo", attributes: 7, type: !2)
bool LLParser::ParseMDObjCProperty(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
- REQUIRED(name, MDStringField, ); \
+ OPTIONAL(name, MDStringField, ); \
OPTIONAL(file, MDField, ); \
OPTIONAL(line, LineField, ); \
OPTIONAL(setter, MDStringField, ); \
@@ -4297,7 +4322,9 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
}
BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
- if (!BB) return true;
+ if (!BB)
+ return Error(NameLoc,
+ "unable to create block named '" + Name + "'");
std::string NameStr;
@@ -5032,7 +5059,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
ParseTypeAndValue(PersFn, PersFnLoc, PFS))
return true;
- LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, 0);
+ std::unique_ptr<LandingPadInst> LP(LandingPadInst::Create(Ty, PersFn, 0));
LP->setCleanup(EatIfPresent(lltok::kw_cleanup));
while (Lex.getKind() == lltok::kw_catch || Lex.getKind() == lltok::kw_filter){
@@ -5046,10 +5073,8 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
Value *V;
LocTy VLoc;
- if (ParseTypeAndValue(V, VLoc, PFS)) {
- delete LP;
+ if (ParseTypeAndValue(V, VLoc, PFS))
return true;
- }
// A 'catch' type expects a non-array constant. A filter clause expects an
// array constant.
@@ -5061,10 +5086,13 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
Error(VLoc, "'filter' clause has an invalid type");
}
- LP->addClause(cast<Constant>(V));
+ Constant *CV = dyn_cast<Constant>(V);
+ if (!CV)
+ return Error(VLoc, "clause argument must be a constant");
+ LP->addClause(CV);
}
- Inst = LP;
+ Inst = LP.release();
return false;
}
@@ -5241,7 +5269,11 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
Lex.Lex();
}
- if (ParseTypeAndValue(Val, Loc, PFS) ||
+ Type *Ty = nullptr;
+ LocTy ExplicitTypeLoc = Lex.getLoc();
+ if (ParseType(Ty) ||
+ ParseToken(lltok::comma, "expected comma after load's type") ||
+ ParseTypeAndValue(Val, Loc, PFS) ||
ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
@@ -5254,6 +5286,10 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
if (Ordering == Release || Ordering == AcquireRelease)
return Error(Loc, "atomic load cannot use Release ordering");
+ if (Ty != cast<PointerType>(Val->getType())->getElementType())
+ return Error(ExplicitTypeLoc,
+ "explicit pointee type doesn't match operand's pointee type");
+
Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope);
return AteExtraComma ? InstExtraComma : InstNormal;
}
@@ -5440,13 +5476,22 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
bool InBounds = EatIfPresent(lltok::kw_inbounds);
- if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
+ Type *Ty = nullptr;
+ LocTy ExplicitTypeLoc = Lex.getLoc();
+ if (ParseType(Ty) ||
+ ParseToken(lltok::comma, "expected comma after getelementptr's type") ||
+ ParseTypeAndValue(Ptr, Loc, PFS))
+ return true;
Type *BaseType = Ptr->getType();
PointerType *BasePointerType = dyn_cast<PointerType>(BaseType->getScalarType());
if (!BasePointerType)
return Error(Loc, "base of getelementptr must be a pointer");
+ if (Ty != BasePointerType->getElementType())
+ return Error(ExplicitTypeLoc,
+ "explicit pointee type doesn't match operand's pointee type");
+
SmallVector<Value*, 16> Indices;
bool AteExtraComma = false;
while (EatIfPresent(lltok::comma)) {
@@ -5469,12 +5514,14 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
Indices.push_back(Val);
}
- if (!Indices.empty() && !BasePointerType->getElementType()->isSized())
+ SmallPtrSet<const Type*, 4> Visited;
+ if (!Indices.empty() &&
+ !BasePointerType->getElementType()->isSized(&Visited))
return Error(Loc, "base element of getelementptr must be sized");
if (!GetElementPtrInst::getIndexedType(BaseType, Indices))
return Error(Loc, "invalid getelementptr indices");
- Inst = GetElementPtrInst::Create(Ptr, Indices);
+ Inst = GetElementPtrInst::Create(Ty, Ptr, Indices);
if (InBounds)
cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
return AteExtraComma ? InstExtraComma : InstNormal;
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index ed1a753..2e76c0e 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -13,6 +13,7 @@
#include "llvm/AsmParser/Parser.h"
#include "LLParser.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 92af0f8..84753ff 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -8,34 +8,375 @@
//===----------------------------------------------------------------------===//
#include "llvm/Bitcode/ReaderWriter.h"
-#include "BitcodeReader.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitstreamReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/DataStream.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-
+#include <deque>
using namespace llvm;
+namespace {
enum {
SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
};
+class BitcodeReaderValueList {
+ std::vector<WeakVH> ValuePtrs;
+
+ /// ResolveConstants - As we resolve forward-referenced constants, we add
+ /// information about them to this vector. This allows us to resolve them in
+ /// bulk instead of resolving each reference at a time. See the code in
+ /// ResolveConstantForwardRefs for more information about this.
+ ///
+ /// The key of this vector is the placeholder constant, the value is the slot
+ /// number that holds the resolved value.
+ typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
+ ResolveConstantsTy ResolveConstants;
+ LLVMContext &Context;
+public:
+ BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+ ~BitcodeReaderValueList() {
+ assert(ResolveConstants.empty() && "Constants not resolved?");
+ }
+
+ // vector compatibility methods
+ unsigned size() const { return ValuePtrs.size(); }
+ void resize(unsigned N) { ValuePtrs.resize(N); }
+ void push_back(Value *V) {
+ ValuePtrs.push_back(V);
+ }
+
+ void clear() {
+ assert(ResolveConstants.empty() && "Constants not resolved?");
+ ValuePtrs.clear();
+ }
+
+ Value *operator[](unsigned i) const {
+ assert(i < ValuePtrs.size());
+ return ValuePtrs[i];
+ }
+
+ Value *back() const { return ValuePtrs.back(); }
+ void pop_back() { ValuePtrs.pop_back(); }
+ bool empty() const { return ValuePtrs.empty(); }
+ void shrinkTo(unsigned N) {
+ assert(N <= size() && "Invalid shrinkTo request!");
+ ValuePtrs.resize(N);
+ }
+
+ Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
+ Value *getValueFwdRef(unsigned Idx, Type *Ty);
+
+ void AssignValue(Value *V, unsigned Idx);
+
+ /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+ /// resolves any forward references.
+ void ResolveConstantForwardRefs();
+};
+
+class BitcodeReaderMDValueList {
+ unsigned NumFwdRefs;
+ bool AnyFwdRefs;
+ unsigned MinFwdRef;
+ unsigned MaxFwdRef;
+ std::vector<TrackingMDRef> MDValuePtrs;
+
+ LLVMContext &Context;
+public:
+ BitcodeReaderMDValueList(LLVMContext &C)
+ : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {}
+
+ // vector compatibility methods
+ unsigned size() const { return MDValuePtrs.size(); }
+ void resize(unsigned N) { MDValuePtrs.resize(N); }
+ void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); }
+ void clear() { MDValuePtrs.clear(); }
+ Metadata *back() const { return MDValuePtrs.back(); }
+ void pop_back() { MDValuePtrs.pop_back(); }
+ bool empty() const { return MDValuePtrs.empty(); }
+
+ Metadata *operator[](unsigned i) const {
+ assert(i < MDValuePtrs.size());
+ return MDValuePtrs[i];
+ }
+
+ void shrinkTo(unsigned N) {
+ assert(N <= size() && "Invalid shrinkTo request!");
+ MDValuePtrs.resize(N);
+ }
+
+ Metadata *getValueFwdRef(unsigned Idx);
+ void AssignValue(Metadata *MD, unsigned Idx);
+ void tryToResolveCycles();
+};
+
+class BitcodeReader : public GVMaterializer {
+ LLVMContext &Context;
+ DiagnosticHandlerFunction DiagnosticHandler;
+ Module *TheModule;
+ std::unique_ptr<MemoryBuffer> Buffer;
+ std::unique_ptr<BitstreamReader> StreamFile;
+ BitstreamCursor Stream;
+ DataStreamer *LazyStreamer;
+ uint64_t NextUnreadBit;
+ bool SeenValueSymbolTable;
+
+ std::vector<Type*> TypeList;
+ BitcodeReaderValueList ValueList;
+ BitcodeReaderMDValueList MDValueList;
+ std::vector<Comdat *> ComdatList;
+ SmallVector<Instruction *, 64> InstructionList;
+
+ std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+ std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+ std::vector<std::pair<Function*, unsigned> > FunctionPrefixes;
+ std::vector<std::pair<Function*, unsigned> > FunctionPrologues;
+
+ SmallVector<Instruction*, 64> InstsWithTBAATag;
+
+ /// MAttributes - The set of attributes by index. Index zero in the
+ /// file is for null, and is thus not represented here. As such all indices
+ /// are off by one.
+ std::vector<AttributeSet> MAttributes;
+
+ /// \brief The set of attribute groups.
+ std::map<unsigned, AttributeSet> MAttributeGroups;
+
+ /// FunctionBBs - While parsing a function body, this is a list of the basic
+ /// blocks for the function.
+ std::vector<BasicBlock*> FunctionBBs;
+
+ // When reading the module header, this list is populated with functions that
+ // have bodies later in the file.
+ std::vector<Function*> FunctionsWithBodies;
+
+ // When intrinsic functions are encountered which require upgrading they are
+ // stored here with their replacement function.
+ typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+ UpgradedIntrinsicMap UpgradedIntrinsics;
+
+ // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+ DenseMap<unsigned, unsigned> MDKindMap;
+
+ // Several operations happen after the module header has been read, but
+ // before function bodies are processed. This keeps track of whether
+ // we've done this yet.
+ bool SeenFirstFunctionBody;
+
+ /// DeferredFunctionInfo - When function bodies are initially scanned, this
+ /// map contains info about where to find deferred function body in the
+ /// stream.
+ DenseMap<Function*, uint64_t> DeferredFunctionInfo;
+
+ /// When Metadata block is initially scanned when parsing the module, we may
+ /// choose to defer parsing of the metadata. This vector contains info about
+ /// which Metadata blocks are deferred.
+ std::vector<uint64_t> DeferredMetadataInfo;
+
+ /// These are basic blocks forward-referenced by block addresses. They are
+ /// inserted lazily into functions when they're loaded. The basic block ID is
+ /// its index into the vector.
+ DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
+ std::deque<Function *> BasicBlockFwdRefQueue;
+
+ /// UseRelativeIDs - Indicates that we are using a new encoding for
+ /// instruction operands where most operands in the current
+ /// FUNCTION_BLOCK are encoded relative to the instruction number,
+ /// for a more compact encoding. Some instruction operands are not
+ /// relative to the instruction ID: basic block numbers, and types.
+ /// Once the old style function blocks have been phased out, we would
+ /// not need this flag.
+ bool UseRelativeIDs;
+
+ /// True if all functions will be materialized, negating the need to process
+ /// (e.g.) blockaddress forward references.
+ bool WillMaterializeAllForwardRefs;
+
+ /// Functions that have block addresses taken. This is usually empty.
+ SmallPtrSet<const Function *, 4> BlockAddressesTaken;
+
+ /// True if any Metadata block has been materialized.
+ bool IsMetadataMaterialized;
+
+public:
+ std::error_code Error(BitcodeError E, const Twine &Message);
+ std::error_code Error(BitcodeError E);
+ std::error_code Error(const Twine &Message);
+
+ explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
+ DiagnosticHandlerFunction DiagnosticHandler);
+ explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C,
+ DiagnosticHandlerFunction DiagnosticHandler);
+ ~BitcodeReader() { FreeState(); }
+
+ std::error_code materializeForwardReferencedFunctions();
+
+ void FreeState();
+
+ void releaseBuffer();
+
+ bool isDematerializable(const GlobalValue *GV) const override;
+ std::error_code materialize(GlobalValue *GV) override;
+ std::error_code MaterializeModule(Module *M) override;
+ std::vector<StructType *> getIdentifiedStructTypes() const override;
+ void Dematerialize(GlobalValue *GV) override;
+
+ /// @brief Main interface to parsing a bitcode buffer.
+ /// @returns true if an error occurred.
+ std::error_code ParseBitcodeInto(Module *M,
+ bool ShouldLazyLoadMetadata = false);
+
+ /// @brief Cheap mechanism to just extract module triple
+ /// @returns true if an error occurred.
+ ErrorOr<std::string> parseTriple();
+
+ static uint64_t decodeSignRotatedValue(uint64_t V);
+
+ /// Materialize any deferred Metadata block.
+ std::error_code materializeMetadata() override;
+
+private:
+ std::vector<StructType *> IdentifiedStructTypes;
+ StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
+ StructType *createIdentifiedStructType(LLVMContext &Context);
+
+ Type *getTypeByID(unsigned ID);
+ Value *getFnValueByID(unsigned ID, Type *Ty) {
+ if (Ty && Ty->isMetadataTy())
+ return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
+ return ValueList.getValueFwdRef(ID, Ty);
+ }
+ Metadata *getFnMetadataByID(unsigned ID) {
+ return MDValueList.getValueFwdRef(ID);
+ }
+ BasicBlock *getBasicBlock(unsigned ID) const {
+ if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
+ return FunctionBBs[ID];
+ }
+ AttributeSet getAttributes(unsigned i) const {
+ if (i-1 < MAttributes.size())
+ return MAttributes[i-1];
+ return AttributeSet();
+ }
+
+ /// getValueTypePair - Read a value/type pair out of the specified record from
+ /// slot 'Slot'. Increment Slot past the number of slots used in the record.
+ /// Return true on failure.
+ bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
+ unsigned InstNum, Value *&ResVal) {
+ if (Slot == Record.size()) return true;
+ unsigned ValNo = (unsigned)Record[Slot++];
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
+ if (ValNo < InstNum) {
+ // If this is not a forward reference, just return the value we already
+ // have.
+ ResVal = getFnValueByID(ValNo, nullptr);
+ return ResVal == nullptr;
+ } else if (Slot == Record.size()) {
+ return true;
+ }
+
+ unsigned TypeNo = (unsigned)Record[Slot++];
+ ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+ return ResVal == nullptr;
+ }
+
+ /// popValue - Read a value out of the specified record from slot 'Slot'.
+ /// Increment Slot past the number of slots used by the value in the record.
+ /// Return true if there is an error.
+ bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
+ unsigned InstNum, Type *Ty, Value *&ResVal) {
+ if (getValue(Record, Slot, InstNum, Ty, ResVal))
+ return true;
+ // All values currently take a single record slot.
+ ++Slot;
+ return false;
+ }
+
+ /// getValue -- Like popValue, but does not increment the Slot number.
+ bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty, Value *&ResVal) {
+ ResVal = getValue(Record, Slot, InstNum, Ty);
+ return ResVal == nullptr;
+ }
+
+ /// getValue -- Version of getValue that returns ResVal directly,
+ /// or 0 if there is an error.
+ Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty) {
+ if (Slot == Record.size()) return nullptr;
+ unsigned ValNo = (unsigned)Record[Slot];
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
+ return getFnValueByID(ValNo, Ty);
+ }
+
+ /// getValueSigned -- Like getValue, but decodes signed VBRs.
+ Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
+ unsigned InstNum, Type *Ty) {
+ if (Slot == Record.size()) return nullptr;
+ unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
+ // Adjust the ValNo, if it was encoded relative to the InstNum.
+ if (UseRelativeIDs)
+ ValNo = InstNum - ValNo;
+ return getFnValueByID(ValNo, Ty);
+ }
+
+ /// Converts alignment exponent (i.e. power of two (or zero)) to the
+ /// corresponding alignment to use. If alignment is too large, returns
+ /// a corresponding error code.
+ std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
+ std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
+ std::error_code ParseModule(bool Resume, bool ShouldLazyLoadMetadata = false);
+ std::error_code ParseAttributeBlock();
+ std::error_code ParseAttributeGroupBlock();
+ std::error_code ParseTypeTable();
+ std::error_code ParseTypeTableBody();
+
+ std::error_code ParseValueSymbolTable();
+ std::error_code ParseConstants();
+ std::error_code RememberAndSkipFunctionBody();
+ /// Save the positions of the Metadata blocks and skip parsing the blocks.
+ std::error_code rememberAndSkipMetadata();
+ std::error_code ParseFunctionBody(Function *F);
+ std::error_code GlobalCleanup();
+ std::error_code ResolveGlobalAndAliasInits();
+ std::error_code ParseMetadata();
+ std::error_code ParseMetadataAttachment();
+ ErrorOr<std::string> parseModuleTriple();
+ std::error_code ParseUseLists();
+ std::error_code InitStream();
+ std::error_code InitStreamFromBuffer();
+ std::error_code InitLazyStream();
+ std::error_code FindFunctionInStream(
+ Function *F,
+ DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
+};
+} // namespace
+
BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC,
DiagnosticSeverity Severity,
const Twine &Msg)
@@ -81,7 +422,7 @@ BitcodeReader::BitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr),
NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
- WillMaterializeAllForwardRefs(false) {}
+ WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {}
BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C,
DiagnosticHandlerFunction DiagnosticHandler)
@@ -89,7 +430,7 @@ BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C,
TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer),
NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false),
- WillMaterializeAllForwardRefs(false) {}
+ WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {}
std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
if (WillMaterializeAllForwardRefs)
@@ -135,6 +476,7 @@ void BitcodeReader::FreeState() {
std::vector<BasicBlock*>().swap(FunctionBBs);
std::vector<Function*>().swap(FunctionsWithBodies);
DeferredFunctionInfo.clear();
+ DeferredMetadataInfo.clear();
MDKindMap.clear();
assert(BasicBlockFwdRefs.empty() && "Unresolved blockaddress fwd references");
@@ -1198,6 +1540,7 @@ std::error_code BitcodeReader::ParseValueSymbolTable() {
static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
std::error_code BitcodeReader::ParseMetadata() {
+ IsMetadataMaterialized = true;
unsigned NextMDValueNo = MDValueList.size();
if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
@@ -1348,14 +1691,15 @@ std::error_code BitcodeReader::ParseMetadata() {
if (Record.size() != 5)
return Error("Invalid record");
- auto get = Record[0] ? MDLocation::getDistinct : MDLocation::get;
unsigned Line = Record[1];
unsigned Column = Record[2];
MDNode *Scope = cast<MDNode>(MDValueList.getValueFwdRef(Record[3]));
Metadata *InlinedAt =
Record[4] ? MDValueList.getValueFwdRef(Record[4] - 1) : nullptr;
- MDValueList.AssignValue(get(Context, Line, Column, Scope, InlinedAt),
- NextMDValueNo++);
+ MDValueList.AssignValue(
+ GET_OR_DISTINCT(MDLocation, Record[0],
+ (Context, Line, Column, Scope, InlinedAt)),
+ NextMDValueNo++);
break;
}
case bitc::METADATA_GENERIC_DEBUG: {
@@ -1952,19 +2296,26 @@ std::error_code BitcodeReader::ParseConstants() {
}
case bitc::CST_CODE_CE_INBOUNDS_GEP:
case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands]
- if (Record.size() & 1)
- return Error("Invalid record");
+ unsigned OpNum = 0;
+ Type *PointeeType = nullptr;
+ if (Record.size() % 2)
+ PointeeType = getTypeByID(Record[OpNum++]);
SmallVector<Constant*, 16> Elts;
- for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
- Type *ElTy = getTypeByID(Record[i]);
+ while (OpNum != Record.size()) {
+ Type *ElTy = getTypeByID(Record[OpNum++]);
if (!ElTy)
return Error("Invalid record");
- Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
+ Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy));
}
+
ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
V = ConstantExpr::getGetElementPtr(Elts[0], Indices,
BitCode ==
bitc::CST_CODE_CE_INBOUNDS_GEP);
+ if (PointeeType &&
+ PointeeType != cast<GEPOperator>(V)->getSourceElementType())
+ return Error("Explicit gep operator type does not match pointee type "
+ "of pointer operand");
break;
}
case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#]
@@ -2234,6 +2585,30 @@ std::error_code BitcodeReader::ParseUseLists() {
}
}
+/// When we see the block for metadata, remember where it is and then skip it.
+/// This lets us lazily deserialize the metadata.
+std::error_code BitcodeReader::rememberAndSkipMetadata() {
+ // Save the current stream state.
+ uint64_t CurBit = Stream.GetCurrentBitNo();
+ DeferredMetadataInfo.push_back(CurBit);
+
+ // Skip over the block for now.
+ if (Stream.SkipBlock())
+ return Error("Invalid record");
+ return std::error_code();
+}
+
+std::error_code BitcodeReader::materializeMetadata() {
+ for (uint64_t BitPos : DeferredMetadataInfo) {
+ // Move the bit stream to the saved position.
+ Stream.JumpToBit(BitPos);
+ if (std::error_code EC = ParseMetadata())
+ return EC;
+ }
+ DeferredMetadataInfo.clear();
+ return std::error_code();
+}
+
/// RememberAndSkipFunctionBody - When we see the block for a function body,
/// remember where it is and then skip it. This lets us lazily deserialize the
/// functions.
@@ -2284,7 +2659,8 @@ std::error_code BitcodeReader::GlobalCleanup() {
return std::error_code();
}
-std::error_code BitcodeReader::ParseModule(bool Resume) {
+std::error_code BitcodeReader::ParseModule(bool Resume,
+ bool ShouldLazyLoadMetadata) {
if (Resume)
Stream.JumpToBit(NextUnreadBit);
else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
@@ -2338,6 +2714,12 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
return EC;
break;
case bitc::METADATA_BLOCK_ID:
+ if (ShouldLazyLoadMetadata && !IsMetadataMaterialized) {
+ if (std::error_code EC = rememberAndSkipMetadata())
+ return EC;
+ break;
+ }
+ assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata");
if (std::error_code EC = ParseMetadata())
return EC;
break;
@@ -2652,7 +3034,8 @@ std::error_code BitcodeReader::ParseModule(bool Resume) {
}
}
-std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
+std::error_code BitcodeReader::ParseBitcodeInto(Module *M,
+ bool ShouldLazyLoadMetadata) {
TheModule = nullptr;
if (std::error_code EC = InitStream())
@@ -2693,7 +3076,7 @@ std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
if (TheModule)
return Error("Invalid multiple blocks");
TheModule = M;
- if (std::error_code EC = ParseModule(false))
+ if (std::error_code EC = ParseModule(false, ShouldLazyLoadMetadata))
return EC;
if (LazyStreamer)
return std::error_code();
@@ -3082,6 +3465,13 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
return Error("Invalid record");
+ if (Ty &&
+ Ty !=
+ cast<SequentialType>(BasePtr->getType()->getScalarType())
+ ->getElementType())
+ return Error(
+ "Explicit gep type does not match pointee type of pointer operand");
+
SmallVector<Value*, 16> GEPIdx;
while (OpNum != Record.size()) {
Value *Op;
@@ -3090,8 +3480,8 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
GEPIdx.push_back(Op);
}
- I = GetElementPtrInst::Create(BasePtr, GEPIdx);
- assert(!Ty || Ty == cast<GetElementPtrInst>(I)->getSourceElementType());
+ I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx);
+
InstructionList.push_back(I);
if (InBounds)
cast<GetElementPtrInst>(I)->setIsInBounds(true);
@@ -3600,8 +3990,9 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
return EC;
I = new LoadInst(Op, "", Record[OpNum+1], Align);
- assert((!Ty || Ty == I->getType()) &&
- "Explicit type doesn't match pointee type of the first operand");
+ if (Ty && Ty != I->getType())
+ return Error("Explicit load type does not match pointee type of "
+ "pointer operand");
InstructionList.push_back(I);
break;
@@ -3631,6 +4022,7 @@ std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
return EC;
I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope);
+ (void)Ty;
assert((!Ty || Ty == I->getType()) &&
"Explicit type doesn't match pointee type of the first operand");
@@ -3890,6 +4282,9 @@ std::error_code BitcodeReader::FindFunctionInStream(
void BitcodeReader::releaseBuffer() { Buffer.release(); }
std::error_code BitcodeReader::materialize(GlobalValue *GV) {
+ if (std::error_code EC = materializeMetadata())
+ return EC;
+
Function *F = dyn_cast<Function>(GV);
// If it's not a function or is already material, ignore the request.
if (!F || !F->isMaterializable())
@@ -3957,6 +4352,9 @@ std::error_code BitcodeReader::MaterializeModule(Module *M) {
assert(M == TheModule &&
"Can only Materialize the Module this BitcodeReader is attached to.");
+ if (std::error_code EC = materializeMetadata())
+ return EC;
+
// Promise to materialize all forward references.
WillMaterializeAllForwardRefs = true;
@@ -4097,7 +4495,8 @@ const std::error_category &llvm::BitcodeErrorCategory() {
static ErrorOr<Module *>
getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
LLVMContext &Context, bool WillMaterializeAll,
- DiagnosticHandlerFunction DiagnosticHandler) {
+ DiagnosticHandlerFunction DiagnosticHandler,
+ bool ShouldLazyLoadMetadata = false) {
Module *M = new Module(Buffer->getBufferIdentifier(), Context);
BitcodeReader *R =
new BitcodeReader(Buffer.get(), Context, DiagnosticHandler);
@@ -4109,7 +4508,8 @@ getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
return EC;
};
- if (std::error_code EC = R->ParseBitcodeInto(M))
+ // Delay parsing Metadata if ShouldLazyLoadMetadata is true.
+ if (std::error_code EC = R->ParseBitcodeInto(M, ShouldLazyLoadMetadata))
return cleanupOnError(EC);
if (!WillMaterializeAll)
@@ -4124,9 +4524,10 @@ getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
ErrorOr<Module *>
llvm::getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
LLVMContext &Context,
- DiagnosticHandlerFunction DiagnosticHandler) {
+ DiagnosticHandlerFunction DiagnosticHandler,
+ bool ShouldLazyLoadMetadata) {
return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false,
- DiagnosticHandler);
+ DiagnosticHandler, ShouldLazyLoadMetadata);
}
ErrorOr<std::unique_ptr<Module>>
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
deleted file mode 100644
index 9803e78..0000000
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ /dev/null
@@ -1,369 +0,0 @@
-//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header defines the BitcodeReader class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_BITCODE_READER_BITCODEREADER_H
-#define LLVM_LIB_BITCODE_READER_BITCODEREADER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Bitcode/BitstreamReader.h"
-#include "llvm/Bitcode/LLVMBitCodes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/GVMaterializer.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/OperandTraits.h"
-#include "llvm/IR/TrackingMDRef.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/ValueHandle.h"
-#include <deque>
-#include <system_error>
-#include <vector>
-
-namespace llvm {
- class Comdat;
- class MemoryBuffer;
- class LLVMContext;
-
-//===----------------------------------------------------------------------===//
-// BitcodeReaderValueList Class
-//===----------------------------------------------------------------------===//
-
-class BitcodeReaderValueList {
- std::vector<WeakVH> ValuePtrs;
-
- /// ResolveConstants - As we resolve forward-referenced constants, we add
- /// information about them to this vector. This allows us to resolve them in
- /// bulk instead of resolving each reference at a time. See the code in
- /// ResolveConstantForwardRefs for more information about this.
- ///
- /// The key of this vector is the placeholder constant, the value is the slot
- /// number that holds the resolved value.
- typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
- ResolveConstantsTy ResolveConstants;
- LLVMContext &Context;
-public:
- BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
- ~BitcodeReaderValueList() {
- assert(ResolveConstants.empty() && "Constants not resolved?");
- }
-
- // vector compatibility methods
- unsigned size() const { return ValuePtrs.size(); }
- void resize(unsigned N) { ValuePtrs.resize(N); }
- void push_back(Value *V) {
- ValuePtrs.push_back(V);
- }
-
- void clear() {
- assert(ResolveConstants.empty() && "Constants not resolved?");
- ValuePtrs.clear();
- }
-
- Value *operator[](unsigned i) const {
- assert(i < ValuePtrs.size());
- return ValuePtrs[i];
- }
-
- Value *back() const { return ValuePtrs.back(); }
- void pop_back() { ValuePtrs.pop_back(); }
- bool empty() const { return ValuePtrs.empty(); }
- void shrinkTo(unsigned N) {
- assert(N <= size() && "Invalid shrinkTo request!");
- ValuePtrs.resize(N);
- }
-
- Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
- Value *getValueFwdRef(unsigned Idx, Type *Ty);
-
- void AssignValue(Value *V, unsigned Idx);
-
- /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
- /// resolves any forward references.
- void ResolveConstantForwardRefs();
-};
-
-
-//===----------------------------------------------------------------------===//
-// BitcodeReaderMDValueList Class
-//===----------------------------------------------------------------------===//
-
-class BitcodeReaderMDValueList {
- unsigned NumFwdRefs;
- bool AnyFwdRefs;
- unsigned MinFwdRef;
- unsigned MaxFwdRef;
- std::vector<TrackingMDRef> MDValuePtrs;
-
- LLVMContext &Context;
-public:
- BitcodeReaderMDValueList(LLVMContext &C)
- : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {}
-
- // vector compatibility methods
- unsigned size() const { return MDValuePtrs.size(); }
- void resize(unsigned N) { MDValuePtrs.resize(N); }
- void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); }
- void clear() { MDValuePtrs.clear(); }
- Metadata *back() const { return MDValuePtrs.back(); }
- void pop_back() { MDValuePtrs.pop_back(); }
- bool empty() const { return MDValuePtrs.empty(); }
-
- Metadata *operator[](unsigned i) const {
- assert(i < MDValuePtrs.size());
- return MDValuePtrs[i];
- }
-
- void shrinkTo(unsigned N) {
- assert(N <= size() && "Invalid shrinkTo request!");
- MDValuePtrs.resize(N);
- }
-
- Metadata *getValueFwdRef(unsigned Idx);
- void AssignValue(Metadata *MD, unsigned Idx);
- void tryToResolveCycles();
-};
-
-class BitcodeReader : public GVMaterializer {
- LLVMContext &Context;
- DiagnosticHandlerFunction DiagnosticHandler;
- Module *TheModule;
- std::unique_ptr<MemoryBuffer> Buffer;
- std::unique_ptr<BitstreamReader> StreamFile;
- BitstreamCursor Stream;
- DataStreamer *LazyStreamer;
- uint64_t NextUnreadBit;
- bool SeenValueSymbolTable;
-
- std::vector<Type*> TypeList;
- BitcodeReaderValueList ValueList;
- BitcodeReaderMDValueList MDValueList;
- std::vector<Comdat *> ComdatList;
- SmallVector<Instruction *, 64> InstructionList;
-
- std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
- std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
- std::vector<std::pair<Function*, unsigned> > FunctionPrefixes;
- std::vector<std::pair<Function*, unsigned> > FunctionPrologues;
-
- SmallVector<Instruction*, 64> InstsWithTBAATag;
-
- /// MAttributes - The set of attributes by index. Index zero in the
- /// file is for null, and is thus not represented here. As such all indices
- /// are off by one.
- std::vector<AttributeSet> MAttributes;
-
- /// \brief The set of attribute groups.
- std::map<unsigned, AttributeSet> MAttributeGroups;
-
- /// FunctionBBs - While parsing a function body, this is a list of the basic
- /// blocks for the function.
- std::vector<BasicBlock*> FunctionBBs;
-
- // When reading the module header, this list is populated with functions that
- // have bodies later in the file.
- std::vector<Function*> FunctionsWithBodies;
-
- // When intrinsic functions are encountered which require upgrading they are
- // stored here with their replacement function.
- typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
- UpgradedIntrinsicMap UpgradedIntrinsics;
-
- // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
- DenseMap<unsigned, unsigned> MDKindMap;
-
- // Several operations happen after the module header has been read, but
- // before function bodies are processed. This keeps track of whether
- // we've done this yet.
- bool SeenFirstFunctionBody;
-
- /// DeferredFunctionInfo - When function bodies are initially scanned, this
- /// map contains info about where to find deferred function body in the
- /// stream.
- DenseMap<Function*, uint64_t> DeferredFunctionInfo;
-
- /// These are basic blocks forward-referenced by block addresses. They are
- /// inserted lazily into functions when they're loaded. The basic block ID is
- /// its index into the vector.
- DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
- std::deque<Function *> BasicBlockFwdRefQueue;
-
- /// UseRelativeIDs - Indicates that we are using a new encoding for
- /// instruction operands where most operands in the current
- /// FUNCTION_BLOCK are encoded relative to the instruction number,
- /// for a more compact encoding. Some instruction operands are not
- /// relative to the instruction ID: basic block numbers, and types.
- /// Once the old style function blocks have been phased out, we would
- /// not need this flag.
- bool UseRelativeIDs;
-
- /// True if all functions will be materialized, negating the need to process
- /// (e.g.) blockaddress forward references.
- bool WillMaterializeAllForwardRefs;
-
- /// Functions that have block addresses taken. This is usually empty.
- SmallPtrSet<const Function *, 4> BlockAddressesTaken;
-
-public:
- std::error_code Error(BitcodeError E, const Twine &Message);
- std::error_code Error(BitcodeError E);
- std::error_code Error(const Twine &Message);
-
- explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
- DiagnosticHandlerFunction DiagnosticHandler);
- explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C,
- DiagnosticHandlerFunction DiagnosticHandler);
- ~BitcodeReader() { FreeState(); }
-
- std::error_code materializeForwardReferencedFunctions();
-
- void FreeState();
-
- void releaseBuffer();
-
- bool isDematerializable(const GlobalValue *GV) const override;
- std::error_code materialize(GlobalValue *GV) override;
- std::error_code MaterializeModule(Module *M) override;
- std::vector<StructType *> getIdentifiedStructTypes() const override;
- void Dematerialize(GlobalValue *GV) override;
-
- /// @brief Main interface to parsing a bitcode buffer.
- /// @returns true if an error occurred.
- std::error_code ParseBitcodeInto(Module *M);
-
- /// @brief Cheap mechanism to just extract module triple
- /// @returns true if an error occurred.
- ErrorOr<std::string> parseTriple();
-
- static uint64_t decodeSignRotatedValue(uint64_t V);
-
-private:
- std::vector<StructType *> IdentifiedStructTypes;
- StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
- StructType *createIdentifiedStructType(LLVMContext &Context);
-
- Type *getTypeByID(unsigned ID);
- Value *getFnValueByID(unsigned ID, Type *Ty) {
- if (Ty && Ty->isMetadataTy())
- return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
- return ValueList.getValueFwdRef(ID, Ty);
- }
- Metadata *getFnMetadataByID(unsigned ID) {
- return MDValueList.getValueFwdRef(ID);
- }
- BasicBlock *getBasicBlock(unsigned ID) const {
- if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
- return FunctionBBs[ID];
- }
- AttributeSet getAttributes(unsigned i) const {
- if (i-1 < MAttributes.size())
- return MAttributes[i-1];
- return AttributeSet();
- }
-
- /// getValueTypePair - Read a value/type pair out of the specified record from
- /// slot 'Slot'. Increment Slot past the number of slots used in the record.
- /// Return true on failure.
- bool getValueTypePair(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
- unsigned InstNum, Value *&ResVal) {
- if (Slot == Record.size()) return true;
- unsigned ValNo = (unsigned)Record[Slot++];
- // Adjust the ValNo, if it was encoded relative to the InstNum.
- if (UseRelativeIDs)
- ValNo = InstNum - ValNo;
- if (ValNo < InstNum) {
- // If this is not a forward reference, just return the value we already
- // have.
- ResVal = getFnValueByID(ValNo, nullptr);
- return ResVal == nullptr;
- } else if (Slot == Record.size()) {
- return true;
- }
-
- unsigned TypeNo = (unsigned)Record[Slot++];
- ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
- return ResVal == nullptr;
- }
-
- /// popValue - Read a value out of the specified record from slot 'Slot'.
- /// Increment Slot past the number of slots used by the value in the record.
- /// Return true if there is an error.
- bool popValue(SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
- unsigned InstNum, Type *Ty, Value *&ResVal) {
- if (getValue(Record, Slot, InstNum, Ty, ResVal))
- return true;
- // All values currently take a single record slot.
- ++Slot;
- return false;
- }
-
- /// getValue -- Like popValue, but does not increment the Slot number.
- bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
- unsigned InstNum, Type *Ty, Value *&ResVal) {
- ResVal = getValue(Record, Slot, InstNum, Ty);
- return ResVal == nullptr;
- }
-
- /// getValue -- Version of getValue that returns ResVal directly,
- /// or 0 if there is an error.
- Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
- unsigned InstNum, Type *Ty) {
- if (Slot == Record.size()) return nullptr;
- unsigned ValNo = (unsigned)Record[Slot];
- // Adjust the ValNo, if it was encoded relative to the InstNum.
- if (UseRelativeIDs)
- ValNo = InstNum - ValNo;
- return getFnValueByID(ValNo, Ty);
- }
-
- /// getValueSigned -- Like getValue, but decodes signed VBRs.
- Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
- unsigned InstNum, Type *Ty) {
- if (Slot == Record.size()) return nullptr;
- unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
- // Adjust the ValNo, if it was encoded relative to the InstNum.
- if (UseRelativeIDs)
- ValNo = InstNum - ValNo;
- return getFnValueByID(ValNo, Ty);
- }
-
- /// Converts alignment exponent (i.e. power of two (or zero)) to the
- /// corresponding alignment to use. If alignment is too large, returns
- /// a corresponding error code.
- std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
- std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
- std::error_code ParseModule(bool Resume);
- std::error_code ParseAttributeBlock();
- std::error_code ParseAttributeGroupBlock();
- std::error_code ParseTypeTable();
- std::error_code ParseTypeTableBody();
-
- std::error_code ParseValueSymbolTable();
- std::error_code ParseConstants();
- std::error_code RememberAndSkipFunctionBody();
- std::error_code ParseFunctionBody(Function *F);
- std::error_code GlobalCleanup();
- std::error_code ResolveGlobalAndAliasInits();
- std::error_code ParseMetadata();
- std::error_code ParseMetadataAttachment();
- ErrorOr<std::string> parseModuleTriple();
- std::error_code ParseUseLists();
- std::error_code InitStream();
- std::error_code InitStreamFromBuffer();
- std::error_code InitLazyStream();
- std::error_code FindFunctionInStream(
- Function *F,
- DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index ca68257..beaaf7a 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -245,7 +245,7 @@ void BitstreamCursor::ReadAbbrevRecord() {
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
unsigned NumOpInfo = ReadVBR(5);
for (unsigned i = 0; i != NumOpInfo; ++i) {
- bool IsLiteral = Read(1) ? true : false;
+ bool IsLiteral = Read(1);
if (IsLiteral) {
Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
continue;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index ecb6f7c..0123fb2 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1205,6 +1205,8 @@ static void WriteModuleMetadata(const Module *M,
SmallVector<uint64_t, 64> Record;
for (const Metadata *MD : MDs) {
if (const MDNode *N = dyn_cast<MDNode>(MD)) {
+ assert(N->isResolved() && "Expected forward references to be resolved");
+
switch (N->getMetadataID()) {
default:
llvm_unreachable("Invalid MDNode subclass");
@@ -1522,15 +1524,18 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
Record.push_back(Flags);
}
break;
- case Instruction::GetElementPtr:
+ case Instruction::GetElementPtr: {
Code = bitc::CST_CODE_CE_GEP;
- if (cast<GEPOperator>(C)->isInBounds())
+ const auto *GO = cast<GEPOperator>(C);
+ if (GO->isInBounds())
Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
+ Record.push_back(VE.getTypeID(GO->getSourceElementType()));
for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
Record.push_back(VE.getValueID(C->getOperand(i)));
}
break;
+ }
case Instruction::Select:
Code = bitc::CST_CODE_CE_SELECT;
Record.push_back(VE.getValueID(C->getOperand(0)));
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 8ab2d6e..ce10998 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -18,3 +18,4 @@ add_subdirectory(AsmParser)
add_subdirectory(LineEditor)
add_subdirectory(ProfileData)
add_subdirectory(Fuzzer)
+add_subdirectory(Passes)
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index e50b846..8e11fe1 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -312,8 +312,7 @@ static const Value *getNoopInput(const Value *V,
// previous aggregate. Combine the two paths to obtain the true address of
// our element.
ArrayRef<unsigned> ExtractLoc = EVI->getIndices();
- std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(),
- std::back_inserter(ValLoc));
+ ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend());
NoopInput = Op;
}
// Terminate if we couldn't find anything to look through.
@@ -601,10 +600,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
// The manipulations performed when we're looking through an insertvalue or
// an extractvalue would happen at the front of the RetPath list, so since
// we have to copy it anyway it's more efficient to create a reversed copy.
- using std::copy;
- SmallVector<unsigned, 4> TmpRetPath, TmpCallPath;
- copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath));
- copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath));
+ SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend());
+ SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend());
// Finally, we can check whether the value produced by the tail call at this
// index is compatible with the value we return.
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index ec3cd77..2827d73 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -21,7 +21,6 @@ codegen_SRC_FILES := \
ExecutionDepsFix.cpp \
ExpandISelPseudos.cpp \
ExpandPostRAPseudos.cpp \
- ForwardControlFlowIntegrity.cpp \
GCMetadata.cpp \
GCMetadataPrinter.cpp \
GCRootLowering.cpp \
@@ -31,7 +30,6 @@ codegen_SRC_FILES := \
InlineSpiller.cpp \
InterferenceCache.cpp \
IntrinsicLowering.cpp \
- JumpInstrTables.cpp \
LatencyPriorityQueue.cpp \
LexicalScopes.cpp \
LiveDebugVariables.cpp \
@@ -53,6 +51,7 @@ codegen_SRC_FILES := \
MachineCombiner.cpp \
MachineCopyPropagation.cpp \
MachineCSE.cpp \
+ MachineDominanceFrontier.cpp \
MachineDominators.cpp \
MachineFunctionAnalysis.cpp \
MachineFunction.cpp \
@@ -66,6 +65,7 @@ codegen_SRC_FILES := \
MachineModuleInfoImpls.cpp \
MachinePassRegistry.cpp \
MachinePostDominators.cpp \
+ MachineRegionInfo.cpp \
MachineRegisterInfo.cpp \
MachineScheduler.cpp \
MachineSink.cpp \
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 6fe75ad..9a16e15 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -36,8 +36,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
-ARMException::ARMException(AsmPrinter *A)
- : EHStreamer(A), shouldEmitCFI(false) {}
+ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
ARMException::~ARMException() {}
@@ -53,13 +52,9 @@ void ARMException::endModule() {
Asm->OutStreamer.EmitCFISections(false, true);
}
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
void ARMException::beginFunction(const MachineFunction *MF) {
if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
getTargetStreamer().emitFnStart();
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
// See if we need call frame info.
AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
assert(MoveType != AsmPrinter::CFI_M_EH &&
@@ -72,20 +67,12 @@ void ARMException::beginFunction(const MachineFunction *MF) {
/// endFunction - Gather and emit post-function exception information.
///
-void ARMException::endFunction(const MachineFunction *) {
- if (shouldEmitCFI)
- Asm->OutStreamer.EmitCFIEndProc();
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
-
+void ARMException::endFunction(const MachineFunction *MF) {
ARMTargetStreamer &ATS = getTargetStreamer();
if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
MMI->getLandingPads().empty())
ATS.emitCantUnwind();
else {
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
- Asm->getFunctionNumber()));
if (!MMI->getLandingPads().empty()) {
// Emit references to personality.
if (const Function *Personality = MMI->getPersonality()) {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 988381d..07d6731 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -77,11 +77,11 @@ static gcp_map_type &getGCMap(void *&P) {
/// getGVAlignmentLog2 - Return the alignment to use for the specified global
/// value in log2 form. This rounds up to the preferred alignment if possible
/// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
unsigned InBits = 0) {
unsigned NumBits = 0;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- NumBits = TD.getPreferredAlignmentLog(GVar);
+ NumBits = DL.getPreferredAlignmentLog(GVar);
// If InBits is specified, round it to it.
if (InBits > NumBits)
@@ -103,12 +103,14 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
OutContext(Streamer->getContext()), OutStreamer(*Streamer.release()),
- LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) {
+ LastMI(nullptr), LastFn(0), Counter(~0U) {
DD = nullptr;
MMI = nullptr;
LI = nullptr;
MF = nullptr;
- CurrentFnSym = CurrentFnSymForSize = nullptr;
+ CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr;
+ CurrentFnBegin = nullptr;
+ CurrentFnEnd = nullptr;
GCMetadataPrinters = nullptr;
VerboseAsm = OutStreamer.isVerboseAsm();
}
@@ -219,9 +221,13 @@ bool AsmPrinter::doInitialization(Module &M) {
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
+ // We're at the module level. Construct MCSubtarget from the default CPU
+ // and target triple.
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
OutStreamer.AddComment("Start of file scope inline assembly");
OutStreamer.AddBlankLine();
- EmitInlineAsm(M.getModuleInlineAsm()+"\n");
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI);
OutStreamer.AddComment("End of file scope inline assembly");
OutStreamer.AddBlankLine();
}
@@ -525,7 +531,8 @@ void AsmPrinter::EmitFunctionHeader() {
EmitVisibility(CurrentFnSym, F->getVisibility());
EmitLinkage(F, CurrentFnSym);
- EmitAlignment(MF->getAlignment(), F);
+ if (MAI->hasFunctionAlignment())
+ EmitAlignment(MF->getAlignment(), F);
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
@@ -554,6 +561,17 @@ void AsmPrinter::EmitFunctionHeader() {
OutStreamer.EmitLabel(DeadBlockSyms[i]);
}
+ if (CurrentFnBegin) {
+ if (MAI->useAssignmentForEHBegin()) {
+ MCSymbol *CurPos = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(CurPos);
+ OutStreamer.EmitAssignment(CurrentFnBegin,
+ MCSymbolRefExpr::Create(CurPos, OutContext));
+ } else {
+ OutStreamer.EmitLabel(CurrentFnBegin);
+ }
+ }
+
// Emit pre-function debug and/or EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
@@ -764,6 +782,8 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
/// EmitFunctionBody - This method emits the body and trailer for a
/// function.
void AsmPrinter::EmitFunctionBody() {
+ EmitFunctionHeader();
+
// Emit target-specific gunk before the function body.
EmitFunctionBodyStart();
@@ -867,32 +887,41 @@ void AsmPrinter::EmitFunctionBody() {
// Emit target-specific gunk after the function body.
EmitFunctionBodyEnd();
+ if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+ MAI->hasDotTypeDotSizeDirective()) {
+ // Create a symbol for the end of function.
+ CurrentFnEnd = createTempSymbol("func_end");
+ OutStreamer.EmitLabel(CurrentFnEnd);
+ }
+
// If the target wants a .size directive for the size of the function, emit
// it.
if (MAI->hasDotTypeDotSizeDirective()) {
- // Create a symbol for the end of function, so we can get the size as
- // difference between the function label and the temp label.
- MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
- OutStreamer.EmitLabel(FnEndLabel);
-
+ // We can get the size as difference between the function label and the
+ // temp label.
const MCExpr *SizeExp =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(CurrentFnEnd, OutContext),
MCSymbolRefExpr::Create(CurrentFnSymForSize,
OutContext),
OutContext);
OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
}
- // Emit post-function debug and/or EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
- HI.Handler->endFunction(MF);
+ HI.Handler->markFunctionEnd();
}
- MMI->EndFunction();
// Print out jump tables referenced by the function.
EmitJumpTableInfo();
+ // Emit post-function debug and/or EH information.
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->endFunction(MF);
+ }
+ MMI->EndFunction();
+
OutStreamer.AddBlankLine();
}
@@ -928,7 +957,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
// To be a got equivalent, at least one of its users need to be a constant
// expression used by another global variable.
for (auto *U : GV->users())
- NumGOTEquivUsers += getNumGlobalVariableUses(cast<Constant>(U));
+ NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U));
return NumGOTEquivUsers > 0;
}
@@ -961,17 +990,25 @@ void AsmPrinter::emitGlobalGOTEquivs() {
if (!getObjFileLowering().supportIndirectSymViaGOTPCRel())
return;
- while (!GlobalGOTEquivs.empty()) {
- DenseMap<const MCSymbol *, GOTEquivUsePair>::iterator I =
- GlobalGOTEquivs.begin();
- const MCSymbol *S = I->first;
- const GlobalVariable *GV = I->second.first;
- GlobalGOTEquivs.erase(S);
- EmitGlobalVariable(GV);
+ SmallVector<const GlobalVariable *, 8> FailedCandidates;
+ for (auto &I : GlobalGOTEquivs) {
+ const GlobalVariable *GV = I.second.first;
+ unsigned Cnt = I.second.second;
+ if (Cnt)
+ FailedCandidates.push_back(GV);
}
+ GlobalGOTEquivs.clear();
+
+ for (auto *GV : FailedCandidates)
+ EmitGlobalVariable(GV);
}
bool AsmPrinter::doFinalization(Module &M) {
+ // Set the MachineFunction to nullptr so that we can catch attempted
+ // accesses to MF specific features at the module level and so that
+ // we can conditionalize accesses based on whether or not it is nullptr.
+ MF = nullptr;
+
// Gather all GOT equivalent globals in the module. We really need two
// passes over the globals: one to compute and another to avoid its emission
// in EmitGlobalVariable, otherwise we would not be able to handle cases
@@ -997,59 +1034,6 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, V, false);
}
- // Get information about jump-instruction tables to print.
- JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>();
-
- if (JITI && !JITI->getTables().empty()) {
- // Since we're at the module level we can't use a function specific
- // MCSubtargetInfo - instead create one with the module defaults.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
- unsigned Arch = Triple(getTargetTriple()).getArch();
- bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
- const TargetInstrInfo *TII = TM.getSubtargetImpl()->getInstrInfo();
- MCInst TrapInst;
- TII->getTrap(TrapInst);
- unsigned LogAlignment = llvm::Log2_64(JITI->entryByteAlignment());
-
- // Emit the right section for these functions.
- OutStreamer.SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
- for (const auto &KV : JITI->getTables()) {
- uint64_t Count = 0;
- for (const auto &FunPair : KV.second) {
- // Emit the function labels to make this be a function entry point.
- MCSymbol *FunSym =
- OutContext.GetOrCreateSymbol(FunPair.second->getName());
- EmitAlignment(LogAlignment);
- if (IsThumb)
- OutStreamer.EmitThumbFunc(FunSym);
- if (MAI->hasDotTypeDotSizeDirective())
- OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction);
- OutStreamer.EmitLabel(FunSym);
-
- // Emit the jump instruction to transfer control to the original
- // function.
- MCInst JumpToFun;
- MCSymbol *TargetSymbol =
- OutContext.GetOrCreateSymbol(FunPair.first->getName());
- const MCSymbolRefExpr *TargetSymRef =
- MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
- OutContext);
- TII->getUnconditionalBranch(JumpToFun, TargetSymRef);
- OutStreamer.EmitInstruction(JumpToFun, *STI);
- ++Count;
- }
-
- // Emit enough padding instructions to fill up to the next power of two.
- uint64_t Remaining = NextPowerOf2(Count) - Count;
- for (uint64_t C = 0; C < Remaining; ++C) {
- EmitAlignment(LogAlignment);
- OutStreamer.EmitInstruction(TrapInst, *STI);
- }
-
- }
- }
-
// Emit module flags.
SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
M.getModuleFlagsMetadata(ModuleFlags);
@@ -1152,11 +1136,26 @@ bool AsmPrinter::doFinalization(Module &M) {
return false;
}
+MCSymbol *AsmPrinter::getCurExceptionSym() {
+ if (!CurExceptionSym)
+ CurExceptionSym = createTempSymbol("exception");
+ return CurExceptionSym;
+}
+
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
// Get the function symbol.
CurrentFnSym = getSymbol(MF.getFunction());
CurrentFnSymForSize = CurrentFnSym;
+ CurrentFnBegin = nullptr;
+ CurExceptionSym = nullptr;
+ bool NeedsLocalForSize = MAI->needsLocalForSize();
+ if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
+ NeedsLocalForSize) {
+ CurrentFnBegin = createTempSymbol("func_begin");
+ if (NeedsLocalForSize)
+ CurrentFnSymForSize = CurrentFnBegin;
+ }
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
@@ -1273,10 +1272,8 @@ void AsmPrinter::EmitJumpTableInfo() {
bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection(
MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
*F);
- if (!JTInDiffSection) {
- OutStreamer.SwitchSection(TLOF.SectionForGlobal(F, *Mang, TM));
- } else {
- // Otherwise, drop it in the readonly section.
+ if (JTInDiffSection) {
+ // Drop it in the readonly section.
const MCSection *ReadOnlySection =
TLOF.getSectionForJumpTable(*F, *Mang, TM);
OutStreamer.SwitchSection(ReadOnlySection);
@@ -1585,7 +1582,7 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
}
// Otherwise, emit with .set (aka assignment).
- MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+ MCSymbol *SetLabel = createTempSymbol("set");
OutStreamer.EmitAssignment(SetLabel, Diff);
OutStreamer.EmitSymbolValue(SetLabel, Size);
}
@@ -1667,8 +1664,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(
- CE, TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
if (C != CE)
return lowerConstant(C);
@@ -2112,9 +2108,15 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
//
// gotpcrelcst := <offset from @foo base> + <cst>
//
+ // If gotpcrelcst is positive it means that we can safely fold the pc rel
+ // displacement into the GOTPCREL. We can also can have an extra offset <cst>
+ // if the target knows how to encode it.
+ //
int64_t GOTPCRelCst = Offset + MV.getConstant();
if (GOTPCRelCst < 0)
return;
+ if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0)
+ return;
// Emit the GOT PC relative to replace the got equivalent global, i.e.:
//
@@ -2134,18 +2136,16 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
//
AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym];
const GlobalVariable *GV = Result.first;
- unsigned NumUses = Result.second;
+ int NumUses = (int)Result.second;
const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
- *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(FinalSym,
- GOTPCRelCst);
+ *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(
+ FinalSym, MV, Offset, AP.MMI, AP.OutStreamer);
// Update GOT equivalent usage information
--NumUses;
- if (NumUses)
+ if (NumUses >= 0)
AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
- else
- AP.GlobalGOTEquivs.erase(GOTEquivSym);
}
static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
@@ -2206,7 +2206,7 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
// If the constant expression's size is greater than 64-bits, then we have
// to emit the value in chunks. Try to constant fold the value and emit it
// that way.
- Constant *New = ConstantFoldConstantExpression(CE, DL);
+ Constant *New = ConstantFoldConstantExpression(CE, *DL);
if (New && New != CE)
return emitGlobalConstantImpl(New, AP);
}
@@ -2257,23 +2257,10 @@ void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
// Symbol Lowering Routines.
//===----------------------------------------------------------------------===//
-/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
-/// temporary label with the specified stem and unique ID.
-MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name, unsigned ID) const {
- const DataLayout *DL = TM.getDataLayout();
- return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
- Name + Twine(ID));
-}
-
-/// GetTempSymbol - Return an assembler temporary label with the specified
-/// stem.
-MCSymbol *AsmPrinter::GetTempSymbol(const Twine &Name) const {
- const DataLayout *DL = TM.getDataLayout();
- return OutContext.GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
- Name);
+MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const {
+ return OutContext.createTempSymbol(Name, true);
}
-
MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
return MMI->getAddrLabelSymbol(BA->getBasicBlock());
}
@@ -2523,3 +2510,5 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
/// Pin vtable to this file.
AsmPrinterHandler::~AsmPrinterHandler() {}
+
+void AsmPrinterHandler::markFunctionEnd() {}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index d0958c1..9de36da 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -12,9 +12,12 @@
//===----------------------------------------------------------------------===//
#include "ByteStreamer.h"
+#include "DwarfDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -27,29 +30,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
- BS.EmitInt8(
- Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
- : dwarf::OperationEncodingString(Op));
-}
-
-void DebugLocDwarfExpression::EmitSigned(int Value) {
- BS.EmitSLEB128(Value, Twine(Value));
-}
-
-void DebugLocDwarfExpression::EmitUnsigned(unsigned Value) {
- BS.EmitULEB128(Value, Twine(Value));
-}
-
-bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
- // This information is not available while emitting .debug_loc entries.
- return false;
-}
-
//===----------------------------------------------------------------------===//
// Dwarf Emission Helper Routines
//===----------------------------------------------------------------------===//
@@ -178,57 +163,28 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
///
/// SectionLabel is a temporary label emitted at the start of the section that
/// Label lives in.
-void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
- const MCSymbol *SectionLabel) const {
+void AsmPrinter::emitSectionOffset(const MCSymbol *Label) const {
// On COFF targets, we have to emit the special .secrel32 directive.
if (MAI->needsDwarfSectionOffsetDirective()) {
OutStreamer.EmitCOFFSecRel32(Label);
return;
}
- // Get the section that we're referring to, based on SectionLabel.
- const MCSection &Section = SectionLabel->getSection();
-
- // If Label has already been emitted, verify that it is in the same section as
- // section label for sanity.
- assert((!Label->isInSection() || &Label->getSection() == &Section) &&
- "Section offset using wrong section base for label");
-
- // If the section in question will end up with an address of 0 anyway, we can
- // just emit an absolute reference to save a relocation.
- if (Section.isBaseAddressKnownZero()) {
+ // If the format uses relocations with dwarf, refer to the symbol directly.
+ if (MAI->doesDwarfUseRelocationsAcrossSections()) {
OutStreamer.EmitSymbolValue(Label, 4);
return;
}
// Otherwise, emit it as a label difference from the start of the section.
- EmitLabelDifference(Label, SectionLabel, 4);
-}
-
-// Some targets do not provide a DWARF register number for every
-// register. This function attempts to emit a DWARF register by
-// emitting a piece of a super-register or by piecing together
-// multiple subregisters that alias the register.
-void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
- const MachineLocation &MLoc,
- unsigned PieceSizeInBits,
- unsigned PieceOffsetInBits) const {
- assert(MLoc.isReg() && "MLoc must be a register");
- DebugLocDwarfExpression Expr(*this, Streamer);
- Expr.AddMachineRegPiece(MLoc.getReg(), PieceSizeInBits, PieceOffsetInBits);
-}
-
-void AsmPrinter::EmitDwarfOpPiece(ByteStreamer &Streamer,
- unsigned PieceSizeInBits,
- unsigned PieceOffsetInBits) const {
- DebugLocDwarfExpression Expr(*this, Streamer);
- Expr.AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
+ EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
}
/// EmitDwarfRegOp - Emit dwarf register operation.
void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
const MachineLocation &MLoc) const {
- DebugLocDwarfExpression Expr(*this, Streamer);
+ DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(),
+ getDwarfDebug()->getDwarfVersion(), Streamer);
const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
if (Reg < 0) {
@@ -285,3 +241,60 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
break;
}
}
+
+void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
+ // Get the abbreviation for this DIE.
+ const DIEAbbrev &Abbrev = Die.getAbbrev();
+
+ // Emit the code (index) for the abbreviation.
+ if (isVerbose())
+ OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
+ "] 0x" + Twine::utohexstr(Die.getOffset()) +
+ ":0x" + Twine::utohexstr(Die.getSize()) + " " +
+ dwarf::TagString(Abbrev.getTag()));
+ EmitULEB128(Abbrev.getNumber());
+
+ const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+
+ // Emit the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+ dwarf::Attribute Attr = AbbrevData[i].getAttribute();
+ dwarf::Form Form = AbbrevData[i].getForm();
+ assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+ if (isVerbose()) {
+ OutStreamer.AddComment(dwarf::AttributeString(Attr));
+ if (Attr == dwarf::DW_AT_accessibility)
+ OutStreamer.AddComment(dwarf::AccessibilityString(
+ cast<DIEInteger>(Values[i])->getValue()));
+ }
+
+ // Emit an attribute using the defined form.
+ Values[i]->EmitValue(this, Form);
+ }
+
+ // Emit the DIE children if any.
+ if (Abbrev.hasChildren()) {
+ for (auto &Child : Die.getChildren())
+ emitDwarfDIE(*Child);
+
+ OutStreamer.AddComment("End Of Children Mark");
+ EmitInt8(0);
+ }
+}
+
+void
+AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const {
+ // For each abbrevation.
+ for (const DIEAbbrev *Abbrev : Abbrevs) {
+ // Emit the abbrevations code (base 1 index.)
+ EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(this);
+ }
+
+ // Mark end of abbreviations.
+ EmitULEB128(0, "EOM(3)");
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index 31867dd..f1efe9d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -41,6 +41,10 @@ public:
/// call.
virtual void beginFunction(const MachineFunction *MF) = 0;
+ // \brief Emit any of function marker (like .cfi_endproc). This is called
+ // before endFunction and cannot switch sections.
+ virtual void markFunctionEnd();
+
/// \brief Gather post-function debug information.
/// Please note that some AsmPrinter implementations may not call
/// beginFunction at all.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index e6e7c97..bf63b1b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -73,7 +73,8 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
}
/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
+ const MDNode *LocMDNode,
InlineAsm::AsmDialect Dialect) const {
assert(!Str.empty() && "Can't emit empty inline asm block");
@@ -93,17 +94,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
!OutStreamer.isIntegratedAssemblerRequired()) {
emitInlineAsmStart();
OutStreamer.EmitRawText(Str);
- // If we have a machine function then grab the MCSubtarget off of that,
- // otherwise we're at the module level and want to construct one from
- // the default CPU and target triple.
- if (MF) {
- emitInlineAsmEnd(MF->getSubtarget<MCSubtargetInfo>(), nullptr);
- } else {
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple(), TM.getTargetCPU(),
- TM.getTargetFeatureString()));
- emitInlineAsmEnd(*STI, nullptr);
- }
+ emitInlineAsmEnd(STI, nullptr);
return;
}
@@ -135,19 +126,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
std::unique_ptr<MCAsmParser> Parser(
createMCAsmParser(SrcMgr, OutContext, OutStreamer, *MAI));
- // Initialize the parser with a fresh subtarget info. It is better to use a
- // new STI here because the parser may modify it and we do not want those
- // modifications to persist after parsing the inlineasm. The modifications
- // made by the parser will be seen by the code emitters because it passes
- // the current STI down to the EncodeInstruction() method.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple(), TM.getTargetCPU(), TM.getTargetFeatureString()));
-
- // Preserve a copy of the original STI because the parser may modify it. For
- // example, when switching between arm and thumb mode. If the target needs to
- // emit code to return to the original state it can do so in
+ // Create a temporary copy of the original STI because the parser may modify
+ // it. For example, when switching between arm and thumb mode. If the target
+ // needs to emit code to return to the original state it can do so in
// emitInlineAsmEnd().
- MCSubtargetInfo STIOrig = *STI;
+ MCSubtargetInfo TmpSTI = STI;
// We create a new MCInstrInfo here since we might be at the module level
// and not have a MachineFunction to initialize the TargetInstrInfo from and
@@ -155,7 +138,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
// because it's not subtarget dependent.
std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
- *STI, *Parser, *MII, TM.Options.MCOptions));
+ TmpSTI, *Parser, *MII, TM.Options.MCOptions));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
@@ -170,7 +153,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
// Don't implicitly switch to the text section before the asm.
int Res = Parser->Run(/*NoInitialTextSection*/ true,
/*NoFinalize*/ true);
- emitInlineAsmEnd(STIOrig, STI.get());
+ emitInlineAsmEnd(STI, &TmpSTI);
if (Res && !HasDiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
@@ -505,7 +488,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
else
EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
- EmitInlineAsm(OS.str(), LocMD, MI->getInlineAsmDialect());
+ EmitInlineAsm(OS.str(), getSubtargetInfo(), LocMD, MI->getInlineAsmDialect());
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
// enabled, so we use emitRawComment.
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 42be114..179a4d4 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -19,6 +19,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/LEB128.h"
+#include <string>
namespace llvm {
class ByteStreamer {
@@ -66,6 +68,33 @@ class HashingByteStreamer : public ByteStreamer {
Hash.addULEB128(DWord);
}
};
+
+class BufferByteStreamer : public ByteStreamer {
+private:
+ SmallVectorImpl<char> &Buffer;
+ // FIXME: This is actually only needed for textual asm output.
+ SmallVectorImpl<std::string> &Comments;
+
+public:
+ BufferByteStreamer(SmallVectorImpl<char> &Buffer,
+ SmallVectorImpl<std::string> &Comments)
+ : Buffer(Buffer), Comments(Comments) {}
+ void EmitInt8(uint8_t Byte, const Twine &Comment) override {
+ Buffer.push_back(Byte);
+ Comments.push_back(Comment.str());
+ }
+ void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
+ raw_svector_ostream OSE(Buffer);
+ encodeSLEB128(DWord, OSE);
+ Comments.push_back(Comment.str());
+ }
+ void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ raw_svector_ostream OSE(Buffer);
+ encodeULEB128(DWord, OSE);
+ Comments.push_back(Comment.str());
+ }
+};
+
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 64ba56b..1a706f7 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -60,7 +61,7 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
/// Emit - Print the abbreviation using the specified asm printer.
///
-void DIEAbbrev::Emit(AsmPrinter *AP) const {
+void DIEAbbrev::Emit(const AsmPrinter *AP) const {
// Emit its Dwarf tag type.
AP->EmitULEB128(Tag, dwarf::TagString(Tag));
@@ -204,7 +205,7 @@ void DIEValue::dump() const {
/// EmitValue - Emit integer of appropriate size.
///
-void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
unsigned Size = ~0U;
switch (Form) {
case dwarf::DW_FORM_flag_present:
@@ -218,6 +219,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_ref2: // Fall thru
case dwarf::DW_FORM_data2: Size = 2; break;
case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_strp: // Fall thru
case dwarf::DW_FORM_ref4: // Fall thru
case dwarf::DW_FORM_data4: Size = 4; break;
case dwarf::DW_FORM_ref8: // Fall thru
@@ -229,6 +231,9 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
case dwarf::DW_FORM_addr:
Size = Asm->getDataLayout().getPointerSize(); break;
+ case dwarf::DW_FORM_ref_addr:
+ Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
+ break;
default: llvm_unreachable("DIE Value form not supported yet");
}
Asm->OutStreamer.EmitIntValue(Integer, Size);
@@ -236,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of integer value in bytes.
///
-unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_flag_present: return 0;
case dwarf::DW_FORM_flag: // Fall thru
@@ -245,6 +250,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_ref2: // Fall thru
case dwarf::DW_FORM_data2: return sizeof(int16_t);
case dwarf::DW_FORM_sec_offset: // Fall thru
+ case dwarf::DW_FORM_strp: // Fall thru
case dwarf::DW_FORM_ref4: // Fall thru
case dwarf::DW_FORM_data4: return sizeof(int32_t);
case dwarf::DW_FORM_ref8: // Fall thru
@@ -255,6 +261,10 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize();
+ case dwarf::DW_FORM_ref_addr:
+ if (AP->OutStreamer.getContext().getDwarfVersion() == 2)
+ return AP->getDataLayout().getPointerSize();
+ return sizeof(int32_t);
default: llvm_unreachable("DIE Value form not supported yet");
}
}
@@ -272,13 +282,13 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
-void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
///
-unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -298,7 +308,7 @@ void DIEExpr::print(raw_ostream &O) const {
/// EmitValue - Emit label value.
///
-void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelReference(Label, SizeOf(AP, Form),
Form == dwarf::DW_FORM_strp ||
Form == dwarf::DW_FORM_sec_offset ||
@@ -307,7 +317,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
/// SizeOf - Determine size of label value in bytes.
///
-unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -326,13 +336,13 @@ void DIELabel::print(raw_ostream &O) const {
/// EmitValue - Emit delta value.
///
-void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
}
/// SizeOf - Determine size of delta value in bytes.
///
-unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4) return 4;
if (Form == dwarf::DW_FORM_sec_offset) return 4;
if (Form == dwarf::DW_FORM_strp) return 4;
@@ -351,13 +361,13 @@ void DIEDelta::print(raw_ostream &O) const {
/// EmitValue - Emit string value.
///
-void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
Access->EmitValue(AP, Form);
}
/// SizeOf - Determine size of delta value in bytes.
///
-unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
return Access->SizeOf(AP, Form);
}
@@ -372,32 +382,9 @@ void DIEString::print(raw_ostream &O) const {
// DIEEntry Implementation
//===----------------------------------------------------------------------===//
-/// Emit something like ".long Hi+Offset-Lo" where the size in bytes of the
-/// directive is specified by Size and Hi/Lo specify the labels.
-static void emitLabelOffsetDifference(MCStreamer &Streamer, const MCSymbol *Hi,
- uint64_t Offset, const MCSymbol *Lo,
- unsigned Size) {
- MCContext &Context = Streamer.getContext();
-
- // Emit Hi+Offset - Lo
- // Get the Hi+Offset expression.
- const MCExpr *Plus =
- MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, Context),
- MCConstantExpr::Create(Offset, Context), Context);
-
- // Get the Hi+Offset-Lo expression.
- const MCExpr *Diff = MCBinaryExpr::CreateSub(
- Plus, MCSymbolRefExpr::Create(Lo, Context), Context);
-
- // Otherwise, emit with .set (aka assignment).
- MCSymbol *SetLabel = Context.CreateTempSymbol();
- Streamer.EmitAssignment(SetLabel, Diff);
- Streamer.EmitSymbolValue(SetLabel, Size);
-}
-
/// EmitValue - Emit debug information entry offset.
///
-void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_ref_addr) {
const DwarfDebug *DD = AP->getDwarfDebug();
@@ -413,14 +400,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
DIEEntry::getRefAddrSize(AP));
else
- emitLabelOffsetDifference(AP->OutStreamer, CU->getSectionSym(), Addr,
- CU->getSectionSym(),
- DIEEntry::getRefAddrSize(AP));
+ AP->OutStreamer.EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
} else
AP->EmitInt32(Entry.getOffset());
}
-unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
+unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
// DWARF4: References that use the attribute form DW_FORM_ref_addr are
// specified to be four bytes in the DWARF 32-bit format and eight bytes
// in the DWARF 64-bit format, while DWARF Version 2 specifies that such
@@ -441,7 +426,7 @@ void DIEEntry::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
// DIETypeSignature Implementation
//===----------------------------------------------------------------------===//
-void DIETypeSignature::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIETypeSignature::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
assert(Form == dwarf::DW_FORM_ref_sig8);
Asm->OutStreamer.EmitIntValue(Unit.getTypeSignature(), 8);
}
@@ -460,7 +445,7 @@ void DIETypeSignature::dump() const { print(dbgs()); }
/// ComputeSize - calculate the size of the location expression.
///
-unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
+unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -472,7 +457,7 @@ unsigned DIELoc::ComputeSize(AsmPrinter *AP) const {
/// EmitValue - Emit location data.
///
-void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
@@ -490,7 +475,7 @@ void DIELoc::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of location data in bytes.
///
-unsigned DIELoc::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -515,7 +500,7 @@ void DIELoc::print(raw_ostream &O) const {
/// ComputeSize - calculate the size of the block.
///
-unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
+unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
if (!Size) {
const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
for (unsigned i = 0, N = Values.size(); i < N; ++i)
@@ -527,7 +512,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) const {
/// EmitValue - Emit block data.
///
-void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
+void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
default: llvm_unreachable("Improper form for block");
case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break;
@@ -543,7 +528,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const {
/// SizeOf - Determine size of block data in bytes.
///
-unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
@@ -564,7 +549,7 @@ void DIEBlock::print(raw_ostream &O) const {
// DIELocList Implementation
//===----------------------------------------------------------------------===//
-unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
+unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_data4)
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
@@ -574,14 +559,14 @@ unsigned DIELocList::SizeOf(AsmPrinter *AP, dwarf::Form Form) const {
/// EmitValue - Emit label value.
///
-void DIELocList::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
+void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
DwarfDebug *DD = AP->getDwarfDebug();
MCSymbol *Label = DD->getDebugLocEntries()[Index].Label;
if (AP->MAI->doesDwarfUseRelocationsAcrossSections() && !DD->useSplitDwarf())
- AP->EmitSectionOffset(Label, DD->getDebugLocSym());
+ AP->emitSectionOffset(Label);
else
- AP->EmitLabelDifference(Label, DD->getDebugLocSym(), 4);
+ AP->EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4);
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 1e2ba2c..da7252a 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -510,7 +510,7 @@ uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) {
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, swap bytes
// appropriately.
- return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+ return support::endian::read64le(Result + 8);
}
/// This is based on the type signature computation given in section 7.27 of the
@@ -531,7 +531,7 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) {
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, swap bytes
// appropriately.
- return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+ return support::endian::read64le(Result + 8);
}
/// This is based on the type signature computation given in section 7.27 of the
@@ -555,5 +555,5 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
// ... take the least significant 8 bytes and return those. Our MD5
// implementation always returns its results in little endian, swap bytes
// appropriately.
- return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+ return support::endian::read64le(Result + 8);
}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 0c2a5e5..bbdf237 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <map>
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 6d55c03..6914bbe 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -9,22 +9,24 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
namespace llvm {
+class AsmPrinter;
class MDNode;
/// \brief This struct describes location entries emitted in the .debug_loc
/// section.
class DebugLocEntry {
- // Begin and end symbols for the address range that this location is valid.
+ /// Begin and end symbols for the address range that this location is valid.
const MCSymbol *Begin;
const MCSymbol *End;
public:
- /// A single location or constant.
+ /// \brief A single location or constant.
struct Value {
Value(const MDNode *Var, const MDNode *Expr, int64_t i)
: Variable(Var), Expression(Expr), EntryKind(E_Integer) {
@@ -41,20 +43,20 @@ public:
Value(const MDNode *Var, const MDNode *Expr, MachineLocation Loc)
: Variable(Var), Expression(Expr), EntryKind(E_Location), Loc(Loc) {
assert(DIVariable(Var).Verify());
- assert(DIExpression(Expr).Verify());
+ assert(DIExpression(Expr)->isValid());
}
- // The variable to which this location entry corresponds.
+ /// The variable to which this location entry corresponds.
const MDNode *Variable;
- // Any complex address location expression for this Value.
+ /// Any complex address location expression for this Value.
const MDNode *Expression;
- // Type of entry that this represents.
+ /// Type of entry that this represents.
enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
enum EntryType EntryKind;
- // Either a constant,
+ /// Either a constant,
union {
int64_t Int;
const ConstantFP *CFP;
@@ -84,6 +86,8 @@ private:
/// A nonempty list of locations/constants belonging to this entry,
/// sorted by offset.
SmallVector<Value, 1> Values;
+ SmallString<8> DWARFBytes;
+ SmallVector<std::string, 1> Comments;
public:
DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val)
@@ -92,9 +96,9 @@ public:
}
/// \brief If this and Next are describing different pieces of the same
- // variable, merge them by appending Next's values to the current
- // list of values.
- // Return true if the merge was successful.
+ /// variable, merge them by appending Next's values to the current
+ /// list of values.
+ /// Return true if the merge was successful.
bool MergeValues(const DebugLocEntry &Next) {
if (Begin == Next.Begin) {
DIExpression Expr(Values[0].Expression);
@@ -135,7 +139,7 @@ public:
}) && "value must be a piece");
}
- // Sort the pieces by offset.
+ // \brief Sort the pieces by offset.
// Remove any duplicate entries by dropping all but the first.
void sortUniqueValues() {
std::sort(Values.begin(), Values.end());
@@ -146,9 +150,18 @@ public:
}),
Values.end());
}
+
+ /// \brief Lower this entry into a DWARF expression.
+ void finalize(const AsmPrinter &AP,
+ const DITypeIdentifierMap &TypeIdentifierMap);
+
+ /// \brief Return the lowered DWARF expression.
+ StringRef getDWARFBytes() const { return DWARFBytes; }
+ /// \brief Return the assembler comments for the lowered DWARF expression.
+ const SmallVectorImpl<std::string> &getComments() const { return Comments; }
};
-/// Compare two Values for equality.
+/// \brief Compare two Values for equality.
inline bool operator==(const DebugLocEntry::Value &A,
const DebugLocEntry::Value &B) {
if (A.EntryKind != B.EntryKind)
@@ -173,7 +186,7 @@ inline bool operator==(const DebugLocEntry::Value &A,
llvm_unreachable("unhandled EntryKind");
}
-/// Compare two pieces based on their offset.
+/// \brief Compare two pieces based on their offset.
inline bool operator<(const DebugLocEntry::Value &A,
const DebugLocEntry::Value &B) {
return A.getExpression().getBitPieceOffset() <
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index a71f35e..f64338e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -54,7 +54,7 @@ void DwarfAccelTable::ComputeBucketCount(void) {
// Then compute the bucket size, minimum of 1 bucket.
if (num > 1024)
Header.bucket_count = num / 4;
- if (num > 16)
+ else if (num > 16)
Header.bucket_count = num / 2;
else
Header.bucket_count = num > 0 ? num : 1;
@@ -70,6 +70,7 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A,
void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
// Create the individual hash data outputs.
+ Data.reserve(Entries.size());
for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end();
EI != EE; ++EI) {
@@ -95,8 +96,17 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
for (size_t i = 0, e = Data.size(); i < e; ++i) {
uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
Buckets[bucket].push_back(Data[i]);
- Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+ Data[i]->Sym = Asm->createTempSymbol(Prefix);
}
+
+ // Sort the contents of the buckets by hash value so that hash
+ // collisions end up together. Stable sort makes testing easier and
+ // doesn't cost much more.
+ for (size_t i = 0; i < Buckets.size(); ++i)
+ std::stable_sort(Buckets[i].begin(), Buckets[i].end(),
+ [] (HashData *LHS, HashData *RHS) {
+ return LHS->HashValue < RHS->HashValue;
+ });
}
// Emits the header for the table via the AsmPrinter.
@@ -136,19 +146,32 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
Asm->EmitInt32(index);
else
Asm->EmitInt32(UINT32_MAX);
- index += Buckets[i].size();
+ // Buckets point in the list of hashes, not to the data. Do not
+ // increment the index multiple times in case of hash collisions.
+ uint64_t PrevHash = UINT64_MAX;
+ for (auto *HD : Buckets[i]) {
+ uint32_t HashValue = HD->HashValue;
+ if (PrevHash != HashValue)
+ ++index;
+ PrevHash = HashValue;
+ }
}
}
// Walk through the buckets and emit the individual hashes for each
// bucket.
void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+ uint64_t PrevHash = UINT64_MAX;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
HI != HE; ++HI) {
+ uint32_t HashValue = (*HI)->HashValue;
+ if (PrevHash == HashValue)
+ continue;
Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
- Asm->EmitInt32((*HI)->HashValue);
+ Asm->EmitInt32(HashValue);
+ PrevHash = HashValue;
}
}
}
@@ -157,11 +180,16 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
// element in each bucket. This is done via a symbol subtraction from the
// beginning of the section. The non-section symbol will be output later
// when we emit the actual data.
-void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) {
+ uint64_t PrevHash = UINT64_MAX;
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
HI != HE; ++HI) {
+ uint32_t HashValue = (*HI)->HashValue;
+ if (PrevHash == HashValue)
+ continue;
+ PrevHash = HashValue;
Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
MCContext &Context = Asm->OutStreamer.getContext();
const MCExpr *Sub = MCBinaryExpr::CreateSub(
@@ -175,17 +203,20 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
// Walk through the buckets and emit the full data for each element in
// the bucket. For the string case emit the dies and the various offsets.
// Terminate each HashData bucket with 0.
-void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
- MCSymbol *StrSym) {
- uint64_t PrevHash = UINT64_MAX;
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+ uint64_t PrevHash = UINT64_MAX;
for (HashList::const_iterator HI = Buckets[i].begin(),
HE = Buckets[i].end();
HI != HE; ++HI) {
+ // Terminate the previous entry if there is no hash collision
+ // with the current one.
+ if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+ Asm->EmitInt32(0);
// Remember to emit the label for our offset.
Asm->OutStreamer.EmitLabel((*HI)->Sym);
Asm->OutStreamer.AddComment((*HI)->Str);
- Asm->EmitSectionOffset((*HI)->Data.StrSym, StrSym);
+ Asm->emitSectionOffset((*HI)->Data.StrSym);
Asm->OutStreamer.AddComment("Num DIEs");
Asm->EmitInt32((*HI)->Data.Values.size());
for (HashDataContents *HD : (*HI)->Data.Values) {
@@ -200,17 +231,17 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
Asm->EmitInt8(HD->Flags);
}
}
- // Emit a 0 to terminate the data unless we have a hash collision.
- if (PrevHash != (*HI)->HashValue)
- Asm->EmitInt32(0);
PrevHash = (*HI)->HashValue;
}
+ // Emit the final end marker for the bucket.
+ if (!Buckets[i].empty())
+ Asm->EmitInt32(0);
}
}
// Emit the entire data structure to the output file.
-void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
- MCSymbol *StrSym) {
+void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin,
+ DwarfDebug *D) {
// Emit the header.
EmitHeader(Asm);
@@ -221,10 +252,10 @@ void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfDebug *D,
EmitHashes(Asm);
// Emit the offsets.
- EmitOffsets(Asm, SecBegin);
+ emitOffsets(Asm, SecBegin);
// Emit the hash data.
- EmitData(Asm, D, StrSym);
+ EmitData(Asm, D);
}
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 74963da..e6fdf08 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -222,8 +222,8 @@ private:
void EmitHeader(AsmPrinter *);
void EmitBuckets(AsmPrinter *);
void EmitHashes(AsmPrinter *);
- void EmitOffsets(AsmPrinter *, MCSymbol *);
- void EmitData(AsmPrinter *, DwarfDebug *D, MCSymbol *StrSym);
+ void emitOffsets(AsmPrinter *, const MCSymbol *);
+ void EmitData(AsmPrinter *, DwarfDebug *D);
// Allocator for HashData and HashDataContents.
BumpPtrAllocator Allocator;
@@ -248,7 +248,7 @@ public:
void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die,
char Flags = 0);
void FinalizeTable(AsmPrinter *, StringRef);
- void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *, MCSymbol *StrSym);
+ void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *);
#ifndef NDEBUG
void print(raw_ostream &O);
void dump() { print(dbgs()); }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index f45b24c..1bee367 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -39,9 +39,24 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
+ : EHStreamer(A), shouldEmitCFI(false) {}
+
+void DwarfCFIExceptionBase::markFunctionEnd() {
+ if (shouldEmitCFI)
+ Asm->OutStreamer.EmitCFIEndProc();
+
+ if (MMI->getLandingPads().empty())
+ return;
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+}
+
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
- : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
- shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
+ : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
+ shouldEmitLSDA(false), shouldEmitMoves(false),
+ moveTypeModule(AsmPrinter::CFI_M_None) {}
DwarfCFIException::~DwarfCFIException() {}
@@ -72,8 +87,6 @@ void DwarfCFIException::endModule() {
}
}
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
@@ -100,7 +113,8 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitLSDA = shouldEmitPersonality &&
LSDAEncoding != dwarf::DW_EH_PE_omit;
- if (!shouldEmitPersonality && !shouldEmitMoves)
+ shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves;
+ if (!shouldEmitCFI)
return;
Asm->OutStreamer.EmitCFIStartProc(/*IsSimple=*/false);
@@ -113,43 +127,18 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
- MCSymbol *EHBegin =
- Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
- if (Asm->MAI->useAssignmentForEHBegin()) {
- MCContext &Ctx = Asm->OutContext;
- MCSymbol *CurPos = Ctx.CreateTempSymbol();
- Asm->OutStreamer.EmitLabel(CurPos);
- Asm->OutStreamer.EmitAssignment(EHBegin,
- MCSymbolRefExpr::Create(CurPos, Ctx));
- } else {
- Asm->OutStreamer.EmitLabel(EHBegin);
- }
-
// Provide LSDA information.
if (!shouldEmitLSDA)
return;
- Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
- Asm->getFunctionNumber()),
- LSDAEncoding);
+ Asm->OutStreamer.EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding);
}
/// endFunction - Gather and emit post-function exception information.
///
void DwarfCFIException::endFunction(const MachineFunction *) {
- if (!shouldEmitPersonality && !shouldEmitMoves)
- return;
-
- Asm->OutStreamer.EmitCFIEndProc();
-
if (!shouldEmitPersonality)
return;
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
- Asm->getFunctionNumber()));
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
-
emitExceptionTable();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index dcc5fe4..eee5fc5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -19,7 +19,7 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU)
: DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU),
- Skeleton(nullptr), LabelBegin(nullptr), BaseAddress(nullptr) {
+ Skeleton(nullptr), BaseAddress(nullptr) {
insertDIE(Node, &getUnitDie());
}
@@ -164,24 +164,17 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(DIGlobalVariable GV) {
addUInt(*Loc, dwarf::DW_FORM_udata,
DD->getAddressPool().getIndex(Sym, /* TLS */ true));
}
- // 3) followed by a custom OP to make the debugger do a TLS lookup.
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
}
addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
- // Add the linkage name.
- StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty())
- // From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
- // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
- // TAG_variable.
- addString(*VariableDIE,
- DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
- : dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
+ addLinkageName(*VariableDIE, GV.getLinkageName());
} else if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(GV.getConstant())) {
addConstantValue(*VariableDIE, CI, GTy);
@@ -243,7 +236,7 @@ void DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
addSectionDelta(Die, Attribute, Label, Sec);
}
-void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
+void DwarfCompileUnit::initStmtList() {
// Define start line table label for each Compile Unit.
MCSymbol *LineTableStartSym =
Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
@@ -255,8 +248,9 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
// left in the skeleton CU and so not included.
// The line table entries are not always emitted in assembly, so it
// is not okay to use line_table_start here.
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
- DwarfLineSectionSym);
+ TLOF.getDwarfLineSection()->getBeginSymbol());
}
void DwarfCompileUnit::applyStmtList(DIE &D) {
@@ -285,7 +279,7 @@ void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin,
DIE &DwarfCompileUnit::updateSubprogramScopeDIE(DISubprogram SP) {
DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes());
- attachLowHighPC(*SPDie, DD->getFunctionBeginSym(), DD->getFunctionEndSym());
+ attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd());
if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim(
*DD->getCurrentFunction()))
addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr);
@@ -378,13 +372,14 @@ void DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
// Emit offset in .debug_range as a relocatable label. emitDIE will handle
// emitting it appropriately.
- auto *RangeSectionSym = DD->getRangeSectionSym();
+ const MCSymbol *RangeSectionSym =
+ TLOF.getDwarfRangesSection()->getBeginSymbol();
- RangeSpanList List(
- Asm->GetTempSymbol("debug_ranges", DD->getNextRangeNumber()),
- std::move(Range));
+ RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range));
// Under fission, ranges are specified by constant offsets relative to the
// CU's DW_AT_GNU_ranges_base.
@@ -709,12 +704,14 @@ void DwarfCompileUnit::collectDeadVariables(DISubprogram SP) {
}
}
-void DwarfCompileUnit::emitHeader(const MCSymbol *ASectionSym) const {
+void DwarfCompileUnit::emitHeader(bool UseOffsets) {
// Don't bother labeling the .dwo unit, as its offset isn't used.
- if (!Skeleton)
+ if (!Skeleton) {
+ LabelBegin = Asm->createTempSymbol("cu_begin");
Asm->OutStreamer.EmitLabel(LabelBegin);
+ }
- DwarfUnit::emitHeader(ASectionSym);
+ DwarfUnit::emitHeader(UseOffsets);
}
/// addGlobalName - Add a new global name to the compile unit.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index c66af65..9484bb6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -36,9 +36,6 @@ class DwarfCompileUnit : public DwarfUnit {
/// Skeleton unit associated with this unit.
DwarfCompileUnit *Skeleton;
- /// A label at the start of the non-dwo section related to this unit.
- MCSymbol *SectionSym;
-
/// The start of the unit within its section.
MCSymbol *LabelBegin;
@@ -76,7 +73,7 @@ public:
return Skeleton;
}
- void initStmtList(MCSymbol *DwarfLineSectionSym);
+ void initStmtList();
/// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
void applyStmtList(DIE &D);
@@ -168,22 +165,9 @@ public:
/// Set the skeleton unit associated with this unit.
void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
- MCSymbol *getSectionSym() const {
+ const MCSymbol *getSectionSym() const {
assert(Section);
- return SectionSym;
- }
-
- /// Pass in the SectionSym even though we could recreate it in every compile
- /// unit (type units will have actually distinct symbols once they're in
- /// comdat sections).
- void initSection(const MCSection *Section, MCSymbol *SectionSym) {
- DwarfUnit::initSection(Section);
- this->SectionSym = SectionSym;
-
- // Don't bother labeling the .dwo unit, as its offset isn't used.
- if (!Skeleton)
- LabelBegin =
- Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
+ return Section->getBeginSymbol();
}
unsigned getLength() {
@@ -191,7 +175,7 @@ public:
getHeaderSize() + UnitDie.getSize();
}
- void emitHeader(const MCSymbol *ASectionSym) const override;
+ void emitHeader(bool UseOffsets) override;
MCSymbol *getLabelBegin() const {
assert(Section);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index aa1f79f..e9ebd97 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/MD5.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -105,6 +106,25 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "DWARF Debug Writer";
+void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+ BS.EmitInt8(
+ Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
+ : dwarf::OperationEncodingString(Op));
+}
+
+void DebugLocDwarfExpression::EmitSigned(int64_t Value) {
+ BS.EmitSLEB128(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
+ BS.EmitULEB128(Value, Twine(Value));
+}
+
+bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) {
+ // This information is not available while emitting .debug_loc entries.
+ return false;
+}
+
//===----------------------------------------------------------------------===//
/// resolve - Look in the DwarfDebug map for the MDNode that
@@ -169,11 +189,12 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
- : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr), GlobalRangeCount(0),
- InfoHolder(A, *this, "info_string", DIEValueAllocator),
+ : Asm(A), MMI(Asm->MMI), PrevLabel(nullptr),
+ InfoHolder(A, "info_string", DIEValueAllocator),
UsedNonDefaultText(false),
- SkeletonHolder(A, *this, "skel_string", DIEValueAllocator),
+ SkeletonHolder(A, "skel_string", DIEValueAllocator),
IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
+ IsPS4(Triple(A->getTargetTriple()).isPS4()),
AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
dwarf::DW_FORM_data4)),
AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -182,17 +203,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
dwarf::DW_FORM_data4)),
AccelTypes(TypeAtoms) {
- DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr;
- DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr;
- DwarfLineSectionSym = nullptr;
- DwarfAddrSectionSym = nullptr;
- DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr;
- FunctionBeginSym = FunctionEndSym = nullptr;
CurFn = nullptr;
CurMI = nullptr;
// Turn on accelerator tables for Darwin by default, pubnames by
- // default for non-Darwin, and handle split dwarf.
+ // default for non-Darwin/PS4, and handle split dwarf.
if (DwarfAccelTables == Default)
HasDwarfAccelTables = IsDarwin;
else
@@ -204,7 +219,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
HasSplitDwarf = SplitDwarf == Enable;
if (DwarfPubSections == Default)
- HasDwarfPubSections = !IsDarwin;
+ HasDwarfPubSections = !IsDarwin && !IsPS4;
else
HasDwarfPubSections = DwarfPubSections == Enable;
@@ -212,6 +227,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
+ // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3).
+ // Everybody else uses GNU's.
+ UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3;
+
Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
{
@@ -223,19 +242,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
DwarfDebug::~DwarfDebug() { }
-// Switch to the specified MCSection and emit an assembler
-// temporary label to it if SymbolStem is specified.
-static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
- const char *SymbolStem = nullptr) {
- Asm->OutStreamer.SwitchSection(Section);
- if (!SymbolStem)
- return nullptr;
-
- MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
- Asm->OutStreamer.EmitLabel(TmpSym);
- return TmpSym;
-}
-
static bool isObjCClass(StringRef Name) {
return Name.startswith("+") || Name.startswith("-");
}
@@ -264,13 +270,6 @@ static StringRef getObjCMethodName(StringRef In) {
return In.slice(In.find(' ') + 1, In.find(']'));
}
-// Helper for sorting sections into a stable output order.
-static bool SectionSort(const MCSection *A, const MCSection *B) {
- std::string LA = (A ? A->getLabelBeginName() : "");
- std::string LB = (B ? B->getLabelBeginName() : "");
- return LA < LB;
-}
-
// Add the various names to the Dwarf accelerator table names.
// TODO: Determine whether or not we should add names for programs
// that do not have a DW_AT_name or DW_AT_linkage_name field - this
@@ -388,7 +387,7 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
NewCU.addString(Die, dwarf::DW_AT_name, FN);
if (!useSplitDwarf()) {
- NewCU.initStmtList(DwarfLineSectionSym);
+ NewCU.initStmtList();
// If we're using split dwarf the compilation dir is going to be in the
// skeleton CU and so we don't need to duplicate it here.
@@ -410,11 +409,9 @@ DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
dwarf::DW_FORM_data1, RVer);
if (useSplitDwarf())
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
- DwarfInfoDWOSectionSym);
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
else
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
- DwarfInfoSectionSym);
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
CUMap.insert(std::make_pair(DIUnit, &NewCU));
CUDieMap.insert(std::make_pair(&Die, &NewCU));
@@ -445,9 +442,6 @@ void DwarfDebug::beginModule() {
return;
TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes);
- // Emit initial sections so we can reference labels later.
- emitSectionLabels();
-
SingleCU = CU_Nodes->getNumOperands() == 1;
for (MDNode *N : CU_Nodes->operands()) {
@@ -458,8 +452,11 @@ void DwarfDebug::beginModule() {
ScopesWithImportedEntities.push_back(std::make_pair(
DIImportedEntity(ImportedEntities.getElement(i)).getContext(),
ImportedEntities.getElement(i)));
- std::sort(ScopesWithImportedEntities.begin(),
- ScopesWithImportedEntities.end(), less_first());
+ // Stable sort to preserve the order of appearance of imported entities.
+ // This is to avoid out-of-order processing of interdependent declarations
+ // within the same scope, e.g. { namespace A = base; namespace B = A; }
+ std::stable_sort(ScopesWithImportedEntities.begin(),
+ ScopesWithImportedEntities.end(), less_first());
DIArray GVs = CUNode.getGlobalVariables();
for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
CU.getOrCreateGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
@@ -541,6 +538,8 @@ void DwarfDebug::collectDeadVariables() {
}
void DwarfDebug::finalizeModuleInfo() {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
finishSubprogramDefinitions();
finishVariableDefinitions();
@@ -570,13 +569,16 @@ void DwarfDebug::finalizeModuleInfo() {
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty())
+ if (!AddrPool.isEmpty()) {
+ const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base,
- DwarfAddrSectionSym, DwarfAddrSectionSym);
- if (!SkCU->getRangeLists().empty())
+ Sym, Sym);
+ }
+ if (!SkCU->getRangeLists().empty()) {
+ const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol();
SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base,
- DwarfDebugRangeSectionSym,
- DwarfDebugRangeSectionSym);
+ Sym, Sym);
+ }
}
// If we have code split among multiple sections or non-contiguous
@@ -613,7 +615,7 @@ void DwarfDebug::endModule() {
// If we aren't actually generating debug info (check beginModule -
// conditionalized on !DisableDebugInfoPrinting and the presence of the
// llvm.dbg.cu metadata node)
- if (!DwarfInfoSectionSym)
+ if (!MMI->hasDebugInfo())
return;
// Finalize the debug info for the module.
@@ -621,12 +623,18 @@ void DwarfDebug::endModule() {
emitDebugStr();
- // Emit all the DIEs into a debug info section.
- emitDebugInfo();
+ if (useSplitDwarf())
+ emitDebugLocDWO();
+ else
+ // Emit info into a debug loc section.
+ emitDebugLoc();
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
+ // Emit all the DIEs into a debug info section.
+ emitDebugInfo();
+
// Emit info into a debug aranges section.
if (GenerateARangeSection)
emitDebugARanges();
@@ -639,12 +647,9 @@ void DwarfDebug::endModule() {
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
- emitDebugLocDWO();
// Emit DWO addresses.
AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
- } else
- // Emit info into a debug loc section.
- emitDebugLoc();
+ }
// Emit info into the dwarf accelerator table sections.
if (useDwarfAccelTables()) {
@@ -828,7 +833,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
if (End != nullptr)
EndLabel = getLabelAfterInsn(End);
else if (std::next(I) == Ranges.end())
- EndLabel = FunctionEndSym;
+ EndLabel = Asm->getFunctionEnd();
else
EndLabel = getLabelBeforeInsn(std::next(I)->first);
assert(EndLabel && "Forgot label after instruction ending a range!");
@@ -922,11 +927,13 @@ DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, DISubprogram SP,
DotDebugLocEntries.resize(DotDebugLocEntries.size() + 1);
DebugLocList &LocList = DotDebugLocEntries.back();
LocList.CU = &TheCU;
- LocList.Label =
- Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1);
+ LocList.Label = Asm->createTempSymbol("debug_loc");
// Build the location list for this variable.
buildLocationList(LocList.List, Ranges);
+ // Finalize the entry by lowering it into a DWARF bytestream.
+ for (auto &Entry : LocList.List)
+ Entry.finalize(*Asm, TypeIdentifierMap);
}
// Collect info for variables that were optimized out.
@@ -964,23 +971,25 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// Check if source location changes, but ignore DBG_VALUE locations.
if (!MI->isDebugValue()) {
DebugLoc DL = MI->getDebugLoc();
- if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
- unsigned Flags = 0;
- PrevInstLoc = DL;
- if (DL == PrologEndLoc) {
- Flags |= DWARF2_FLAG_PROLOGUE_END;
- PrologEndLoc = DebugLoc();
- Flags |= DWARF2_FLAG_IS_STMT;
- }
- if (DL.getLine() !=
- Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
- Flags |= DWARF2_FLAG_IS_STMT;
-
+ if (DL != PrevInstLoc) {
if (!DL.isUnknown()) {
+ unsigned Flags = 0;
+ PrevInstLoc = DL;
+ if (DL == PrologEndLoc) {
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ PrologEndLoc = DebugLoc();
+ Flags |= DWARF2_FLAG_IS_STMT;
+ }
+ if (DL.getLine() !=
+ Asm->OutStreamer.getContext().getCurrentDwarfLoc().getLine())
+ Flags |= DWARF2_FLAG_IS_STMT;
+
const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
- } else
+ } else if (UnknownLocations) {
+ PrevInstLoc = DL;
recordSourceLine(0, 0, nullptr, 0);
+ }
}
}
@@ -1116,11 +1125,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
else
Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
- // Emit a label for the function so that we have a beginning address.
- FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber());
- // Assumes in correct section after the entry point.
- Asm->OutStreamer.EmitLabel(FunctionBeginSym);
-
// Calculate history for local variables.
calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(),
DbgValues);
@@ -1131,12 +1135,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (Ranges.empty())
continue;
- // The first mention of a function argument gets the FunctionBeginSym
+ // The first mention of a function argument gets the CurrentFnBegin
// label, so arguments are visible when breaking at function entry.
DIVariable DIVar(Ranges.front().first->getDebugVariable());
if (DIVar.isVariable() && DIVar.getTag() == dwarf::DW_TAG_arg_variable &&
getDISubprogram(DIVar.getContext()).describes(MF->getFunction())) {
- LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym;
+ LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
if (Ranges.front().first->getDebugExpression().isBitPiece()) {
// Mark all non-overlapping initial pieces.
for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
@@ -1145,7 +1149,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
[&](DbgValueHistoryMap::InstrRange Pred) {
return !piecesOverlap(Piece, Pred.first->getDebugExpression());
}))
- LabelsBeforeInsn[I->first] = FunctionBeginSym;
+ LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
else
break;
}
@@ -1160,7 +1164,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
PrevInstLoc = DebugLoc();
- PrevLabel = FunctionBeginSym;
+ PrevLabel = Asm->getFunctionBegin();
// Record beginning of function.
PrologEndLoc = findPrologueEndLoc(MF);
@@ -1191,11 +1195,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
return;
}
- // Define end label for subprogram.
- FunctionEndSym = Asm->GetTempSymbol("func_end", Asm->getFunctionNumber());
- // Assumes in correct section after the entry point.
- Asm->OutStreamer.EmitLabel(FunctionEndSym);
-
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
@@ -1207,7 +1206,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
collectVariableInfo(TheCU, SP, ProcessedVars);
// Add the range of this function to the list of ranges for the CU.
- TheCU.addRange(RangeSpan(FunctionBeginSym, FunctionEndSym));
+ TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
// Under -gmlt, skip building the subprogram if there are no inlined
// subroutines inside it.
@@ -1290,103 +1289,10 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
// Emit Methods
//===----------------------------------------------------------------------===//
-// Emit initial Dwarf sections with a label at the start of each one.
-void DwarfDebug::emitSectionLabels() {
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
- // Dwarf sections base addresses.
- DwarfInfoSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
- if (useSplitDwarf()) {
- DwarfInfoDWOSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfInfoDWOSection(), "section_info_dwo");
- DwarfTypesDWOSectionSym = emitSectionSym(
- Asm, TLOF.getDwarfTypesDWOSection(), "section_types_dwo");
- }
- DwarfAbbrevSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
- if (useSplitDwarf())
- DwarfAbbrevDWOSectionSym = emitSectionSym(
- Asm, TLOF.getDwarfAbbrevDWOSection(), "section_abbrev_dwo");
- if (GenerateARangeSection)
- emitSectionSym(Asm, TLOF.getDwarfARangesSection());
-
- DwarfLineSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
- if (GenerateGnuPubSections) {
- DwarfGnuPubNamesSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection());
- DwarfGnuPubTypesSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection());
- } else if (HasDwarfPubSections) {
- emitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
- emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
- }
-
- DwarfStrSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
- if (useSplitDwarf()) {
- DwarfStrDWOSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
- DwarfAddrSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
- DwarfDebugLocSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfLocDWOSection(), "skel_loc");
- } else
- DwarfDebugLocSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc");
- DwarfDebugRangeSectionSym =
- emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range");
-}
-
-// Recursively emits a debug information entry.
-void DwarfDebug::emitDIE(DIE &Die) {
- // Get the abbreviation for this DIE.
- const DIEAbbrev &Abbrev = Die.getAbbrev();
-
- // Emit the code (index) for the abbreviation.
- if (Asm->isVerbose())
- Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
- "] 0x" + Twine::utohexstr(Die.getOffset()) +
- ":0x" + Twine::utohexstr(Die.getSize()) + " " +
- dwarf::TagString(Abbrev.getTag()));
- Asm->EmitULEB128(Abbrev.getNumber());
-
- const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
-
- // Emit the DIE attribute values.
- for (unsigned i = 0, N = Values.size(); i < N; ++i) {
- dwarf::Attribute Attr = AbbrevData[i].getAttribute();
- dwarf::Form Form = AbbrevData[i].getForm();
- assert(Form && "Too many attributes for DIE (check abbreviation)");
-
- if (Asm->isVerbose()) {
- Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
- if (Attr == dwarf::DW_AT_accessibility)
- Asm->OutStreamer.AddComment(dwarf::AccessibilityString(
- cast<DIEInteger>(Values[i])->getValue()));
- }
-
- // Emit an attribute using the defined form.
- Values[i]->EmitValue(Asm, Form);
- }
-
- // Emit the DIE children if any.
- if (Abbrev.hasChildren()) {
- for (auto &Child : Die.getChildren())
- emitDIE(*Child);
-
- Asm->OutStreamer.AddComment("End Of Children Mark");
- Asm->EmitInt8(0);
- }
-}
-
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
-
- Holder.emitUnits(DwarfAbbrevSectionSym);
+ Holder.emitUnits(/* UseOffsets */ false);
}
// Emit the abbreviation section.
@@ -1396,65 +1302,39 @@ void DwarfDebug::emitAbbreviations() {
Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
}
-// Emit the last address of the section and the end of the line matrix.
-void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
- // Define last address of section.
- Asm->OutStreamer.AddComment("Extended Op");
- Asm->EmitInt8(0);
-
- Asm->OutStreamer.AddComment("Op size");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
- Asm->OutStreamer.AddComment("DW_LNE_set_address");
- Asm->EmitInt8(dwarf::DW_LNE_set_address);
-
- Asm->OutStreamer.AddComment("Section end label");
-
- Asm->OutStreamer.EmitSymbolValue(
- Asm->GetTempSymbol("section_end", SectionEnd),
- Asm->getDataLayout().getPointerSize());
-
- // Mark end of matrix.
- Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
- Asm->EmitInt8(0);
- Asm->EmitInt8(1);
- Asm->EmitInt8(1);
-}
-
void DwarfDebug::emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
- StringRef TableName, StringRef SymName) {
+ StringRef TableName) {
Accel.FinalizeTable(Asm, TableName);
Asm->OutStreamer.SwitchSection(Section);
- auto *SectionBegin = Asm->GetTempSymbol(SymName);
- Asm->OutStreamer.EmitLabel(SectionBegin);
// Emit the full data.
- Accel.Emit(Asm, SectionBegin, this, DwarfStrSectionSym);
+ Accel.emit(Asm, Section->getBeginSymbol(), this);
}
// Emit visible names into a hashed accelerator table section.
void DwarfDebug::emitAccelNames() {
emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(),
- "Names", "names_begin");
+ "Names");
}
// Emit objective C classes and categories into a hashed accelerator table
// section.
void DwarfDebug::emitAccelObjC() {
emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(),
- "ObjC", "objc_begin");
+ "ObjC");
}
// Emit namespace dies into a hashed accelerator table.
void DwarfDebug::emitAccelNamespaces() {
emitAccel(AccelNamespace,
Asm->getObjFileLowering().getDwarfAccelNamespaceSection(),
- "namespac", "namespac_begin");
+ "namespac");
}
// Emit type dies into a hashed accelerator table.
void DwarfDebug::emitAccelTypes() {
emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(),
- "types", "types_begin");
+ "types");
}
// Public name handling.
@@ -1537,15 +1417,14 @@ void DwarfDebug::emitDebugPubSection(
if (auto *Skeleton = TheU->getSkeleton())
TheU = Skeleton;
- unsigned ID = TheU->getUniqueID();
// Start the dwarf pubnames section.
Asm->OutStreamer.SwitchSection(PSec);
// Emit the header.
Asm->OutStreamer.AddComment("Length of Public " + Name + " Info");
- MCSymbol *BeginLabel = Asm->GetTempSymbol("pub" + Name + "_begin", ID);
- MCSymbol *EndLabel = Asm->GetTempSymbol("pub" + Name + "_end", ID);
+ MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin");
+ MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end");
Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
Asm->OutStreamer.EmitLabel(BeginLabel);
@@ -1554,7 +1433,7 @@ void DwarfDebug::emitDebugPubSection(
Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION);
Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
- Asm->EmitSectionOffset(TheU->getLabelBegin(), TheU->getSectionSym());
+ Asm->emitSectionOffset(TheU->getLabelBegin());
Asm->OutStreamer.AddComment("Compilation Unit Length");
Asm->EmitInt32(TheU->getLength());
@@ -1600,62 +1479,27 @@ void DwarfDebug::emitDebugStr() {
Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection());
}
-/// Emits an optimal (=sorted) sequence of DW_OP_pieces.
-void DwarfDebug::emitLocPieces(ByteStreamer &Streamer,
- const DITypeIdentifierMap &Map,
- ArrayRef<DebugLocEntry::Value> Values) {
- assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
- return P.isBitPiece();
- }) && "all values are expected to be pieces");
- assert(std::is_sorted(Values.begin(), Values.end()) &&
- "pieces are expected to be sorted");
-
- unsigned Offset = 0;
- for (auto Piece : Values) {
- DIExpression Expr = Piece.getExpression();
- unsigned PieceOffset = Expr.getBitPieceOffset();
- unsigned PieceSize = Expr.getBitPieceSize();
- assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
- if (Offset < PieceOffset) {
- // The DWARF spec seriously mandates pieces with no locations for gaps.
- Asm->EmitDwarfOpPiece(Streamer, PieceOffset-Offset);
- Offset += PieceOffset-Offset;
- }
- Offset += PieceSize;
-
-#ifndef NDEBUG
- DIVariable Var = Piece.getVariable();
- unsigned VarSize = Var.getSizeInBits(Map);
- assert(PieceSize+PieceOffset <= VarSize
- && "piece is larger than or outside of variable");
- assert(PieceSize != VarSize
- && "piece covers entire variable");
-#endif
- emitDebugLocValue(Streamer, Piece, PieceOffset);
- }
-}
-
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocEntry &Entry) {
- const DebugLocEntry::Value Value = Entry.getValues()[0];
- if (Value.isBitPiece())
- // Emit all pieces that belong to the same variable and range.
- return emitLocPieces(Streamer, TypeIdentifierMap, Entry.getValues());
-
- assert(Entry.getValues().size() == 1 && "only pieces may have >1 value");
- emitDebugLocValue(Streamer, Value);
+ auto Comment = Entry.getComments().begin();
+ auto End = Entry.getComments().end();
+ for (uint8_t Byte : Entry.getDWARFBytes())
+ Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
}
-void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
- const DebugLocEntry::Value &Value,
- unsigned PieceOffsetInBits) {
+static void emitDebugLocValue(const AsmPrinter &AP,
+ const DITypeIdentifierMap &TypeIdentifierMap,
+ ByteStreamer &Streamer,
+ const DebugLocEntry::Value &Value,
+ unsigned PieceOffsetInBits) {
DIVariable DV = Value.getVariable();
- DebugLocDwarfExpression DwarfExpr(*Asm, Streamer);
-
+ DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(),
+ AP.getDwarfDebug()->getDwarfVersion(),
+ Streamer);
// Regular entry.
if (Value.isInt()) {
- DIBasicType BTy(resolve(DV.getType()));
+ DIBasicType BTy(DV.getType().resolve(TypeIdentifierMap));
if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
BTy.getEncoding() == dwarf::DW_ATE_signed_char))
DwarfExpr.AddSignedConstant(Value.getInt());
@@ -1666,7 +1510,7 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
DIExpression Expr = Value.getExpression();
if (!Expr || (Expr.getNumElements() == 0))
// Regular entry.
- Asm->EmitDwarfRegOp(Streamer, Loc);
+ AP.EmitDwarfRegOp(Streamer, Loc);
else {
// Complex address entry.
if (Loc.getOffset()) {
@@ -1682,6 +1526,52 @@ void DwarfDebug::emitDebugLocValue(ByteStreamer &Streamer,
// FIXME: ^
}
+
+void DebugLocEntry::finalize(const AsmPrinter &AP,
+ const DITypeIdentifierMap &TypeIdentifierMap) {
+ BufferByteStreamer Streamer(DWARFBytes, Comments);
+ const DebugLocEntry::Value Value = Values[0];
+ if (Value.isBitPiece()) {
+ // Emit all pieces that belong to the same variable and range.
+ assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
+ return P.isBitPiece();
+ }) && "all values are expected to be pieces");
+ assert(std::is_sorted(Values.begin(), Values.end()) &&
+ "pieces are expected to be sorted");
+
+ unsigned Offset = 0;
+ for (auto Piece : Values) {
+ DIExpression Expr = Piece.getExpression();
+ unsigned PieceOffset = Expr.getBitPieceOffset();
+ unsigned PieceSize = Expr.getBitPieceSize();
+ assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
+ if (Offset < PieceOffset) {
+ // The DWARF spec seriously mandates pieces with no locations for gaps.
+ DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(),
+ AP.getDwarfDebug()->getDwarfVersion(),
+ Streamer);
+ Expr.AddOpPiece(PieceOffset-Offset, 0);
+ Offset += PieceOffset-Offset;
+ }
+ Offset += PieceSize;
+
+#ifndef NDEBUG
+ DIVariable Var = Piece.getVariable();
+ unsigned VarSize = Var.getSizeInBits(TypeIdentifierMap);
+ assert(PieceSize+PieceOffset <= VarSize
+ && "piece is larger than or outside of variable");
+ assert(PieceSize != VarSize
+ && "piece covers entire variable");
+#endif
+ emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Piece, PieceOffset);
+ }
+ } else {
+ assert(Values.size() == 1 && "only pieces may have >1 value");
+ emitDebugLocValue(AP, TypeIdentifierMap, Streamer, Value, 0);
+ }
+}
+
+
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocEntry &Entry) {
Asm->OutStreamer.AddComment("Loc expr size");
MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol();
@@ -1752,10 +1642,7 @@ struct ArangeSpan {
// address we can tie back to a CU.
void DwarfDebug::emitDebugARanges() {
// Provides a unique id per text section.
- DenseMap<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
-
- // Prime section data.
- SectionMap[Asm->getObjFileLowering().getTextSection()];
+ MapVector<const MCSection *, SmallVector<SymbolCU, 8>> SectionMap;
// Filter labels by section.
for (const SymbolCU &SCU : ArangeLabels) {
@@ -1772,31 +1659,13 @@ void DwarfDebug::emitDebugARanges() {
}
}
- // Build a list of sections used.
- std::vector<const MCSection *> Sections;
- for (const auto &it : SectionMap) {
- const MCSection *Section = it.first;
- Sections.push_back(Section);
- }
-
- // Sort the sections into order.
- // This is only done to ensure consistent output order across different runs.
- std::sort(Sections.begin(), Sections.end(), SectionSort);
-
// Add terminating symbols for each section.
- for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) {
- const MCSection *Section = Sections[ID];
+ for (const auto &I : SectionMap) {
+ const MCSection *Section = I.first;
MCSymbol *Sym = nullptr;
- if (Section) {
- // We can't call MCSection::getLabelEndName, as it's only safe to do so
- // if we know the section name up-front. For user-created sections, the
- // resulting label may not be valid to use as a label. (section names can
- // use a greater set of characters on some systems)
- Sym = Asm->GetTempSymbol("debug_end", ID);
- Asm->OutStreamer.SwitchSection(Section);
- Asm->OutStreamer.EmitLabel(Sym);
- }
+ if (Section)
+ Sym = Asm->OutStreamer.endSection(Section);
// Insert a final terminator.
SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
@@ -1804,8 +1673,9 @@ void DwarfDebug::emitDebugARanges() {
DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans;
- for (const MCSection *Section : Sections) {
- SmallVector<SymbolCU, 8> &List = SectionMap[Section];
+ for (auto &I : SectionMap) {
+ const MCSection *Section = I.first;
+ SmallVector<SymbolCU, 8> &List = I.second;
if (List.size() < 2)
continue;
@@ -1902,7 +1772,7 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer.AddComment("DWARF Arange version number");
Asm->EmitInt16(dwarf::DW_ARANGES_VERSION);
Asm->OutStreamer.AddComment("Offset Into Debug Info Section");
- Asm->EmitSectionOffset(CU->getLabelBegin(), CU->getSectionSym());
+ Asm->emitSectionOffset(CU->getLabelBegin());
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(PtrSize);
Asm->OutStreamer.AddComment("Segment Size (in bytes)");
@@ -1998,10 +1868,9 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
auto OwnedUnit = make_unique<DwarfCompileUnit>(
CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
- NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
- DwarfInfoSectionSym);
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
- NewCU.initStmtList(DwarfLineSectionSym);
+ NewCU.initStmtList();
initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
@@ -2012,9 +1881,8 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
// compile units that would normally be in debug_info.
void DwarfDebug::emitDebugInfoDWO() {
assert(useSplitDwarf() && "No split dwarf debug info?");
- // Don't pass an abbrev symbol, using a constant zero instead so as not to
- // emit relocations into the dwo file.
- InfoHolder.emitUnits(/* AbbrevSymbol */ nullptr);
+ // Don't emit relocations into the dwo file.
+ InfoHolder.emitUnits(/* UseOffsets */ true);
}
// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
@@ -2058,7 +1926,7 @@ static uint64_t makeTypeSignature(StringRef Identifier) {
// appropriately.
MD5::MD5Result Result;
Hash.final(Result);
- return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+ return support::endian::read64le(Result + 8);
}
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 1c0e163..74db3ef 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -88,7 +88,8 @@ public:
: Var(V), Expr(1, E), TheDIE(nullptr), DotDebugLocOffset(~0U),
MInsn(nullptr), DD(DD) {
FrameIndex.push_back(FI);
- assert(Var.Verify() && E.Verify());
+ assert(Var.Verify());
+ assert(!E || E->isValid());
}
/// Construct a DbgVariable from a DEBUG_VALUE.
@@ -243,25 +244,10 @@ class DwarfDebug : public AsmPrinterHandler {
// If nonnull, stores the CU in which the previous subprogram was contained.
const DwarfCompileUnit *PrevCU;
- // Section Symbols: these are assembler temporary labels that are emitted at
- // the beginning of each supported dwarf section. These are used to form
- // section offsets and are created by EmitSectionLabels.
- MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
- MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
- MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
- MCSymbol *FunctionBeginSym, *FunctionEndSym;
- MCSymbol *DwarfInfoDWOSectionSym, *DwarfAbbrevDWOSectionSym;
- MCSymbol *DwarfTypesDWOSectionSym;
- MCSymbol *DwarfStrDWOSectionSym;
- MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym;
-
// As an optimization, there is no need to emit an entry in the directory
// table for the same directory as DW_AT_comp_dir.
StringRef CompilationDir;
- // Counter for assigning globally unique IDs for ranges.
- unsigned GlobalRangeCount;
-
// Holder for the file specific debug information.
DwarfFile InfoHolder;
@@ -290,6 +276,9 @@ class DwarfDebug : public AsmPrinterHandler {
// text.
bool UsedNonDefaultText;
+ // Whether to use the GNU TLS opcode (instead of the standard opcode).
+ bool UseGNUTLSOpcode;
+
// Version of dwarf we're emitting.
unsigned DwarfVersion;
@@ -318,6 +307,7 @@ class DwarfDebug : public AsmPrinterHandler {
// True iff there are multiple CUs in this module.
bool SingleCU;
bool IsDarwin;
+ bool IsPS4;
AddressPool AddrPool;
@@ -347,9 +337,6 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
- /// \brief Emit initial Dwarf sections with a label at the start of each one.
- void emitSectionLabels();
-
/// \brief Compute the size and offset of a DIE given an incoming Offset.
unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
@@ -373,13 +360,9 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Emit the abbreviation section.
void emitAbbreviations();
- /// \brief Emit the last address of the section and the end of
- /// the line matrix.
- void emitEndOfLineMatrix(unsigned SectionEnd);
-
/// \brief Emit a specified accelerator table.
void emitAccel(DwarfAccelTable &Accel, const MCSection *Section,
- StringRef TableName, StringRef SymName);
+ StringRef TableName);
/// \brief Emit visible names into a hashed accelerator table section.
void emitAccelNames();
@@ -540,8 +523,9 @@ public:
SymSize[Sym] = Size;
}
- /// \brief Recursively Emits a debug information entry.
- void emitDIE(DIE &Die);
+ /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the
+ /// standard DW_OP_form_tls_address opcode
+ bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
// Experimental DWARF5 features.
@@ -556,15 +540,6 @@ public:
/// Returns the Dwarf Version.
unsigned getDwarfVersion() const { return DwarfVersion; }
- /// Returns the section symbol for the .debug_loc section.
- MCSymbol *getDebugLocSym() const { return DwarfDebugLocSectionSym; }
-
- /// Returns the section symbol for the .debug_str section.
- MCSymbol *getDebugStrSym() const { return DwarfStrSectionSym; }
-
- /// Returns the section symbol for the .debug_ranges section.
- MCSymbol *getRangeSectionSym() const { return DwarfDebugRangeSectionSym; }
-
/// Returns the previous CU that was being updated
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
@@ -577,7 +552,8 @@ public:
/// \brief Emit an entry for the debug loc section. This can be used to
/// handle an entry that's going to be emitted into the debug loc section.
- void emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry);
+ void emitDebugLocEntry(ByteStreamer &Streamer,
+ const DebugLocEntry &Entry);
/// \brief emit a single value for the debug loc section.
void emitDebugLocValue(ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
@@ -621,8 +597,6 @@ public:
void addAccelType(StringRef Name, const DIE &Die, char Flags);
const MachineFunction *getCurrentFunction() const { return CurFn; }
- const MCSymbol *getFunctionBeginSym() const { return FunctionBeginSym; }
- const MCSymbol *getFunctionEndSym() const { return FunctionEndSym; }
iterator_range<ImportedEntityMap::const_iterator>
findImportedEntitiesForScope(const MDNode *Scope) const {
@@ -642,12 +616,6 @@ public:
/// \brief Return Label immediately following the instruction.
MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
- // FIXME: Consider rolling ranges up into DwarfDebug since we use a single
- // range_base anyway, so there's no need to keep them as separate per-CU range
- // lists. (though one day we might end up with a range.dwo section, in which
- // case it'd go to DwarfFile)
- unsigned getNextRangeNumber() { return GlobalRangeCount++; }
-
// FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index e8867c0..6eaf707 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -21,17 +21,24 @@ namespace llvm {
class MachineFunction;
class ARMTargetStreamer;
-class DwarfCFIException : public EHStreamer {
- /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
- /// should be emitted.
+class DwarfCFIExceptionBase : public EHStreamer {
+protected:
+ DwarfCFIExceptionBase(AsmPrinter *A);
+
+ /// Per-function flag to indicate if frame CFI info should be emitted.
+ bool shouldEmitCFI;
+
+ void markFunctionEnd() override;
+};
+
+class DwarfCFIException : public DwarfCFIExceptionBase {
+ /// Per-function flag to indicate if .cfi_personality should be emitted.
bool shouldEmitPersonality;
- /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda
- /// should be emitted.
+ /// Per-function flag to indicate if .cfi_lsda should be emitted.
bool shouldEmitLSDA;
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
+ /// Per-function flag to indicate if frame moves info should be emitted.
bool shouldEmitMoves;
AsmPrinter::CFIMoveType moveTypeModule;
@@ -43,26 +50,21 @@ public:
DwarfCFIException(AsmPrinter *A);
virtual ~DwarfCFIException();
- /// endModule - Emit all exception information that should come after the
- /// content.
+ /// Emit all exception information that should come after the content.
void endModule() override;
- /// beginFunction - Gather pre-function exception information. Assumes being
- /// emitted immediately after the function entry point.
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
void beginFunction(const MachineFunction *MF) override;
- /// endFunction - Gather and emit post-function exception information.
+ /// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
};
-class ARMException : public EHStreamer {
+class ARMException : public DwarfCFIExceptionBase {
void emitTypeInfos(unsigned TTypeEncoding) override;
ARMTargetStreamer &getTargetStreamer();
- /// shouldEmitCFI - Per-function flag to indicate if frame CFI info
- /// should be emitted.
- bool shouldEmitCFI;
-
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -70,15 +72,14 @@ public:
ARMException(AsmPrinter *A);
virtual ~ARMException();
- /// endModule - Emit all exception information that should come after the
- /// content.
+ /// Emit all exception information that should come after the content.
void endModule() override;
- /// beginFunction - Gather pre-function exception information. Assumes being
- /// emitted immediately after the function entry point.
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
void beginFunction(const MachineFunction *MF) override;
- /// endFunction - Gather and emit post-function exception information.
+ /// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index fcab067..489e455 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -22,14 +22,6 @@
using namespace llvm;
-const TargetRegisterInfo *DwarfExpression::getTRI() const {
- return AP.TM.getSubtargetImpl()->getRegisterInfo();
-}
-
-unsigned DwarfExpression::getDwarfVersion() const {
- return AP.getDwarfDebug()->getDwarfVersion();
-}
-
void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
if (DwarfReg < 32) {
@@ -74,28 +66,28 @@ void DwarfExpression::AddShr(unsigned ShiftBy) {
}
bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) {
- int DwarfReg = getTRI()->getDwarfRegNum(MachineReg, false);
- if (DwarfReg < 0)
- return false;
-
if (isFrameRegister(MachineReg)) {
// If variable offset is based in frame register then use fbreg.
EmitOp(dwarf::DW_OP_fbreg);
EmitSigned(Offset);
- } else {
- AddRegIndirect(DwarfReg, Offset);
+ return true;
}
+
+ int DwarfReg = TRI.getDwarfRegNum(MachineReg, false);
+ if (DwarfReg < 0)
+ return false;
+
+ AddRegIndirect(DwarfReg, Offset);
return true;
}
bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
unsigned PieceSizeInBits,
unsigned PieceOffsetInBits) {
- const TargetRegisterInfo *TRI = getTRI();
- if (!TRI->isPhysicalRegister(MachineReg))
+ if (!TRI.isPhysicalRegister(MachineReg))
return false;
- int Reg = TRI->getDwarfRegNum(MachineReg, false);
+ int Reg = TRI.getDwarfRegNum(MachineReg, false);
// If this is a valid register number, emit it.
if (Reg >= 0) {
@@ -107,12 +99,12 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
// Walk up the super-register chain until we find a valid number.
// For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
- for (MCSuperRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
- Reg = TRI->getDwarfRegNum(*SR, false);
+ for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+ Reg = TRI.getDwarfRegNum(*SR, false);
if (Reg >= 0) {
- unsigned Idx = TRI->getSubRegIndex(*SR, MachineReg);
- unsigned Size = TRI->getSubRegIdxSize(Idx);
- unsigned RegOffset = TRI->getSubRegIdxOffset(Idx);
+ unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
+ unsigned Size = TRI.getSubRegIdxSize(Idx);
+ unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
AddReg(Reg, "super-register");
if (PieceOffsetInBits == RegOffset) {
AddOpPiece(Size, RegOffset);
@@ -136,15 +128,15 @@ bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg,
// efficient DW_OP_piece.
unsigned CurPos = PieceOffsetInBits;
// The size of the register in bits, assuming 8 bits per byte.
- unsigned RegSize = TRI->getMinimalPhysRegClass(MachineReg)->getSize() * 8;
+ unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
// Keep track of the bits in the register we already emitted, so we
// can avoid emitting redundant aliasing subregs.
SmallBitVector Coverage(RegSize, false);
- for (MCSubRegIterator SR(MachineReg, TRI); SR.isValid(); ++SR) {
- unsigned Idx = TRI->getSubRegIndex(MachineReg, *SR);
- unsigned Size = TRI->getSubRegIdxSize(Idx);
- unsigned Offset = TRI->getSubRegIdxOffset(Idx);
- Reg = TRI->getDwarfRegNum(*SR, false);
+ for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
+ unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR);
+ unsigned Size = TRI.getSubRegIdxSize(Idx);
+ unsigned Offset = TRI.getSubRegIdxOffset(Idx);
+ Reg = TRI.getDwarfRegNum(*SR, false);
// Intersection between the bits we already emitted and the bits
// covered by this subregister.
@@ -180,7 +172,7 @@ void DwarfExpression::AddSignedConstant(int Value) {
// value, so the producers and consumers started to rely on heuristics
// to disambiguate the value vs. location status of the expression.
// See PR21176 for more details.
- if (getDwarfVersion() >= 4)
+ if (DwarfVersion >= 4)
EmitOp(dwarf::DW_OP_stack_value);
}
@@ -188,7 +180,7 @@ void DwarfExpression::AddUnsignedConstant(unsigned Value) {
EmitOp(dwarf::DW_OP_constu);
EmitUnsigned(Value);
// cf. comment in DwarfExpression::AddSignedConstant().
- if (getDwarfVersion() >= 4)
+ if (DwarfVersion >= 4)
EmitOp(dwarf::DW_OP_stack_value);
}
@@ -204,11 +196,12 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
unsigned MachineReg,
unsigned PieceOffsetInBits) {
auto I = Expr.begin();
- // Pattern-match combinations for which more efficient representations exist
- // first.
- if (I == Expr.end())
+ auto E = Expr.end();
+ if (I == E)
return AddMachineRegPiece(MachineReg);
+ // Pattern-match combinations for which more efficient representations exist
+ // first.
bool ValidReg = false;
switch (*I) {
case dwarf::DW_OP_bit_piece: {
@@ -218,20 +211,23 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
return AddMachineRegPiece(MachineReg, SizeInBits,
getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
}
- case dwarf::DW_OP_plus:
+ case dwarf::DW_OP_plus: {
// [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
- if (I->getNext() == dwarf::DW_OP_deref) {
+ auto N = I->getNext();
+ if ((N != E) && (*N == dwarf::DW_OP_deref)) {
unsigned Offset = I->getArg(1);
ValidReg = AddMachineRegIndirect(MachineReg, Offset);
std::advance(I, 2);
break;
} else
ValidReg = AddMachineRegPiece(MachineReg);
- case dwarf::DW_OP_deref:
- // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
- ValidReg = AddMachineRegIndirect(MachineReg);
- ++I;
- break;
+ }
+ case dwarf::DW_OP_deref: {
+ // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+ ValidReg = AddMachineRegIndirect(MachineReg);
+ ++I;
+ break;
+ }
default:
llvm_unreachable("unsupported operand");
}
@@ -240,7 +236,7 @@ bool DwarfExpression::AddMachineRegExpression(DIExpression Expr,
return false;
// Emit remaining elements of the expression.
- AddExpression(I, Expr.end(), PieceOffsetInBits);
+ AddExpression(I, E, PieceOffsetInBits);
return true;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index b90b7b6..985d52c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -30,21 +30,22 @@ class DIELoc;
/// entry.
class DwarfExpression {
protected:
- const AsmPrinter &AP;
// Various convenience accessors that extract things out of AsmPrinter.
- const TargetRegisterInfo *getTRI() const;
- unsigned getDwarfVersion() const;
+ const TargetRegisterInfo &TRI;
+ unsigned DwarfVersion;
public:
- DwarfExpression(const AsmPrinter &AP) : AP(AP) {}
+ DwarfExpression(const TargetRegisterInfo &TRI,
+ unsigned DwarfVersion)
+ : TRI(TRI), DwarfVersion(DwarfVersion) {}
virtual ~DwarfExpression() {}
/// Output a dwarf operand and an optional assembler comment.
virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
/// Emit a raw signed value.
- virtual void EmitSigned(int Value) = 0;
+ virtual void EmitSigned(int64_t Value) = 0;
/// Emit a raw unsigned value.
- virtual void EmitUnsigned(unsigned Value) = 0;
+ virtual void EmitUnsigned(uint64_t Value) = 0;
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(unsigned MachineReg) = 0;
@@ -105,27 +106,27 @@ class DebugLocDwarfExpression : public DwarfExpression {
ByteStreamer &BS;
public:
- DebugLocDwarfExpression(const AsmPrinter &AP, ByteStreamer &BS)
- : DwarfExpression(AP), BS(BS) {}
+ DebugLocDwarfExpression(const TargetRegisterInfo &TRI,
+ unsigned DwarfVersion, ByteStreamer &BS)
+ : DwarfExpression(TRI, DwarfVersion), BS(BS) {}
void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int Value) override;
- void EmitUnsigned(unsigned Value) override;
+ void EmitSigned(int64_t Value) override;
+ void EmitUnsigned(uint64_t Value) override;
bool isFrameRegister(unsigned MachineReg) override;
};
/// DwarfExpression implementation for singular DW_AT_location.
class DIEDwarfExpression : public DwarfExpression {
+const AsmPrinter &AP;
DwarfUnit &DU;
DIELoc &DIE;
public:
- DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE)
- : DwarfExpression(AP), DU(DU), DIE(DIE) {}
-
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int Value) override;
- void EmitUnsigned(unsigned Value) override;
+ void EmitSigned(int64_t Value) override;
+ void EmitUnsigned(uint64_t Value) override;
bool isFrameRegister(unsigned MachineReg) override;
};
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 3988f0d..60acc58e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -17,9 +17,8 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
namespace llvm {
-DwarfFile::DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
- BumpPtrAllocator &DA)
- : Asm(AP), DD(DD), StrPool(DA, *Asm, Pref) {}
+DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
+ : Asm(AP), StrPool(DA, *Asm, Pref) {}
DwarfFile::~DwarfFile() {}
@@ -48,15 +47,15 @@ void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
// Emit the various dwarf units to the unit section USection with
// the abbreviations going into ASection.
-void DwarfFile::emitUnits(const MCSymbol *ASectionSym) {
+void DwarfFile::emitUnits(bool UseOffsets) {
for (const auto &TheU : CUs) {
DIE &Die = TheU->getUnitDie();
const MCSection *USection = TheU->getSection();
Asm->OutStreamer.SwitchSection(USection);
- TheU->emitHeader(ASectionSym);
+ TheU->emitHeader(UseOffsets);
- DD.emitDIE(Die);
+ Asm->emitDwarfDIE(Die);
}
}
@@ -120,23 +119,13 @@ unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
Die.setSize(Offset - Die.getOffset());
return Offset;
}
+
void DwarfFile::emitAbbrevs(const MCSection *Section) {
// Check to see if it is worth the effort.
if (!Abbreviations.empty()) {
// Start the debug abbrev section.
Asm->OutStreamer.SwitchSection(Section);
-
- // For each abbrevation.
- for (const DIEAbbrev *Abbrev : Abbreviations) {
- // Emit the abbrevations code (base 1 index.)
- Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
-
- // Emit the abbreviations data.
- Abbrev->Emit(Asm);
- }
-
- // Mark end of abbreviations.
- Asm->EmitULEB128(0, "EOM(3)");
+ Asm->emitDwarfAbbrevs(Abbreviations);
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 35bf33a..c9de666 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -37,8 +37,6 @@ class DwarfFile {
// Target of Dwarf emission, used for sizing of abbreviations.
AsmPrinter *Asm;
- DwarfDebug &DD;
-
// Used to uniquely define abbreviations.
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -62,8 +60,7 @@ class DwarfFile {
DenseMap<const MDNode *, DIE *> MDTypeNodeToDieMap;
public:
- DwarfFile(AsmPrinter *AP, DwarfDebug &DD, StringRef Pref,
- BumpPtrAllocator &DA);
+ DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
~DwarfFile();
@@ -83,7 +80,7 @@ public:
/// \brief Emit all of the units to the section listed with the given
/// abbreviation section.
- void emitUnits(const MCSymbol *ASectionSym);
+ void emitUnits(bool UseOffsets);
/// \brief Emit a set of abbreviations to the specific section.
void emitAbbrevs(const MCSection *);
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index d76b66c..165ef16 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -19,7 +19,7 @@ getEntry(AsmPrinter &Asm,
std::pair<MCSymbol *, unsigned> &Entry = Pool[Str];
if (!Entry.first) {
Entry.second = Pool.size() - 1;
- Entry.first = Asm.GetTempSymbol(Prefix, Entry.second);
+ Entry.first = Asm.createTempSymbol(Prefix);
}
return Entry;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index b0c7d48..f6af73f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -17,6 +17,7 @@
#include "DwarfDebug.h"
#include "DwarfExpression.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -43,17 +44,23 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
cl::desc("Generate DWARF4 type units."),
cl::init(false));
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
+ DIELoc &DIE)
+ : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(),
+ AP.getDwarfDebug()->getDwarfVersion()),
+ AP(AP), DU(DU), DIE(DIE) {}
+
void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
}
-void DIEDwarfExpression::EmitSigned(int Value) {
+void DIEDwarfExpression::EmitSigned(int64_t Value) {
DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
}
-void DIEDwarfExpression::EmitUnsigned(unsigned Value) {
+void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
}
bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) {
- return MachineReg == getTRI()->getFrameRegister(*AP.MF);
+ return MachineReg == TRI.getFrameRegister(*AP.MF);
}
@@ -257,12 +264,14 @@ void DwarfUnit::addIndexedString(DIE &Die, dwarf::Attribute Attribute,
/// to be in the local string pool instead of indirected.
void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String);
DIEValue *Value;
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
Value = new (DIEValueAllocator) DIELabel(Symb);
else
- Value = new (DIEValueAllocator) DIEDelta(Symb, DD->getDebugStrSym());
+ Value = new (DIEValueAllocator)
+ DIEDelta(Symb, TLOF.getDwarfStrSection()->getBeginSymbol());
DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
Die.addValue(Attribute, dwarf::DW_FORM_strp, Str);
}
@@ -750,6 +759,15 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
addBlock(Die, dwarf::DW_AT_const_value, Block);
}
+// Add a linkage name to the DIE.
+void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
+ if (!LinkageName.empty())
+ addString(Die,
+ DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
+ : dwarf::DW_AT_MIPS_linkage_name,
+ GlobalValue::getRealLinkageName(LinkageName));
+}
+
/// addTemplateParams - Add template parameters into buffer.
void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
// Add template parameters.
@@ -1269,9 +1287,8 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(DISubprogram SP,
assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
LinkageName == DeclLinkageName) &&
"decl has a linkage name and it is different");
- if (!LinkageName.empty() && DeclLinkageName.empty())
- addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
- GlobalValue::getRealLinkageName(LinkageName));
+ if (DeclLinkageName.empty())
+ addLinkageName(SPDie, LinkageName);
if (!DeclDie)
return false;
@@ -1344,9 +1361,8 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie,
if (SP.isOptimized())
addFlag(SPDie, dwarf::DW_AT_APPLE_optimized);
- if (unsigned isa = Asm->getISAEncoding(SP.getFunction())) {
+ if (unsigned isa = Asm->getISAEncoding())
addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
- }
if (SP.isLValueReference())
addFlag(SPDie, dwarf::DW_AT_reference);
@@ -1597,7 +1613,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
return &StaticMemberDIE;
}
-void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
+void DwarfUnit::emitHeader(bool UseOffsets) {
// Emit size of content not including length itself
Asm->OutStreamer.AddComment("Length of Unit");
Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
@@ -1605,14 +1621,16 @@ void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
Asm->OutStreamer.AddComment("DWARF version number");
Asm->EmitInt16(DD->getDwarfVersion());
Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+
// We share one abbreviations table across all units so it's always at the
// start of the section. Use a relocatable offset where needed to ensure
// linking doesn't invalidate that offset.
- if (ASectionSym)
- Asm->EmitSectionOffset(ASectionSym, ASectionSym);
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ if (!UseOffsets)
+ Asm->emitSectionOffset(TLOF.getDwarfAbbrevSection()->getBeginSymbol());
else
- // Use a constant value when no symbol is provided.
Asm->EmitInt32(0);
+
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
}
@@ -1622,8 +1640,8 @@ void DwarfUnit::initSection(const MCSection *Section) {
this->Section = Section;
}
-void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
- DwarfUnit::emitHeader(ASectionSym);
+void DwarfTypeUnit::emitHeader(bool UseOffsets) {
+ DwarfUnit::emitHeader(UseOffsets);
Asm->OutStreamer.AddComment("Type Signature");
Asm->OutStreamer.EmitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer.AddComment("Type DIE Offset");
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 7a5e47d..81c5821 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -120,7 +120,6 @@ protected:
DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
- void initSection(const MCSection *Section);
/// Add a string attribute data and value.
void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
@@ -132,6 +131,8 @@ protected:
public:
virtual ~DwarfUnit();
+ void initSection(const MCSection *Section);
+
const MCSection *getSection() const {
assert(Section);
return Section;
@@ -251,6 +252,9 @@ public:
void addConstantFPValue(DIE &Die, const MachineOperand &MO);
void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
+ /// \brief Add a linkage name, if it isn't empty.
+ void addLinkageName(DIE &Die, StringRef LinkageName);
+
/// addTemplateParams - Add template parameters in buffer.
void addTemplateParams(DIE &Buffer, DIArray TParams);
@@ -321,7 +325,7 @@ public:
}
/// Emit the header for this unit, not including the initial length field.
- virtual void emitHeader(const MCSymbol *ASectionSym) const;
+ virtual void emitHeader(bool UseOffsets);
virtual DwarfCompileUnit &getCU() = 0;
@@ -423,12 +427,11 @@ public:
void setType(const DIE *Ty) { this->Ty = Ty; }
/// Emit the header for this unit, not including the initial length field.
- void emitHeader(const MCSymbol *ASectionSym) const override;
+ void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
sizeof(uint32_t); // Type DIE Offset
}
- using DwarfUnit::initSection;
DwarfCompileUnit &getCU() override { return CU; }
};
} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 4841814..14df4c9 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -436,12 +436,7 @@ void EHStreamer::emitExceptionTable() {
Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
Twine(Asm->getFunctionNumber()));
Asm->OutStreamer.EmitLabel(GCCETSym);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
- Asm->getFunctionNumber()));
-
- if (IsSJLJ)
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
- Asm->getFunctionNumber()));
+ Asm->OutStreamer.EmitLabel(Asm->getCurExceptionSym());
// Emit the LSDA header.
Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
@@ -552,16 +547,14 @@ void EHStreamer::emitExceptionTable() {
I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
const CallSiteEntry &S = *I;
- MCSymbol *EHFuncBeginSym =
- Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+ MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
MCSymbol *BeginLabel = S.BeginLabel;
if (!BeginLabel)
BeginLabel = EHFuncBeginSym;
MCSymbol *EndLabel = S.EndLabel;
if (!EndLabel)
- EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-
+ EndLabel = Asm->getFunctionEnd();
// Offset of the call site relative to the previous call site, counted in
// number of 16-byte bundles. The first call site is counted relative to
@@ -689,19 +682,3 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
Asm->EmitULEB128(TypeID);
}
}
-
-/// Emit all exception information that should come after the content.
-void EHStreamer::endModule() {
- llvm_unreachable("Should be implemented");
-}
-
-/// Gather pre-function exception information. Assumes it's being emitted
-/// immediately after the function entry point.
-void EHStreamer::beginFunction(const MachineFunction *MF) {
- llvm_unreachable("Should be implemented");
-}
-
-/// Gather and emit post-function exception information.
-void EHStreamer::endFunction(const MachineFunction *) {
- llvm_unreachable("Should be implemented");
-}
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index 9b316ff..94d0585 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -125,16 +125,6 @@ public:
EHStreamer(AsmPrinter *A);
virtual ~EHStreamer();
- /// Emit all exception information that should come after the content.
- void endModule() override;
-
- /// Gather pre-function exception information. Assumes being emitted
- /// immediately after the function entry point.
- void beginFunction(const MachineFunction *MF) override;
-
- /// Gather and emit post-function exception information.
- void endFunction(const MachineFunction *) override;
-
// Unused.
void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
void beginInstruction(const MachineInstr *MI) override {}
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 2b03877..7d76ead 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -48,8 +48,6 @@ Win64Exception::~Win64Exception() {}
void Win64Exception::endModule() {
}
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
void Win64Exception::beginFunction(const MachineFunction *MF) {
shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
@@ -80,9 +78,6 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
const MCSymbol *PersHandlerSym =
TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
}
/// endFunction - Gather and emit post-function exception information.
@@ -91,9 +86,6 @@ void Win64Exception::endFunction(const MachineFunction *) {
if (!shouldEmitPersonality && !shouldEmitMoves)
return;
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
- Asm->getFunctionNumber()));
-
// Map all labels and get rid of any dead landing pads.
MMI->TidyLandingPads();
@@ -170,10 +162,8 @@ void Win64Exception::emitCSpecificHandlerTable() {
SmallVector<CallSiteEntry, 64> CallSites;
computeCallSiteTable(CallSites, LandingPads, FirstActions);
- MCSymbol *EHFuncBeginSym =
- Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
- MCSymbol *EHFuncEndSym =
- Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+ MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
+ MCSymbol *EHFuncEndSym = Asm->getFunctionEnd();
// Emit the number of table entries.
unsigned NumEntries = 0;
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index b5e0929..d2b4eec 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -190,8 +190,11 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
return;
assert(FI.End && "Don't know where the function ends?");
- StringRef FuncName = getDISubprogram(GV).getDisplayName(),
- GVName = GV->getName();
+ StringRef GVName = GV->getName();
+ StringRef FuncName;
+ if (DISubprogram SP = getDISubprogram(GV))
+ FuncName = SP.getDisplayName();
+
// FIXME Clang currently sets DisplayName to "bar" for a C++
// "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying
// to demangle display names anyways, so let's just put a mangled name into
@@ -364,10 +367,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
FnDebugInfo.erase(GV);
VisitedFunctions.pop_back();
} else {
- // Define end label for subprogram.
- MCSymbol *FunctionEndSym = Asm->OutStreamer.getContext().CreateTempSymbol();
- Asm->OutStreamer.EmitLabel(FunctionEndSym);
- CurFn->End = FunctionEndSym;
+ CurFn->End = Asm->getFunctionEnd();
}
CurFn = nullptr;
}
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 4b64be0..fa17108 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -48,7 +48,7 @@ namespace {
bool expandAtomicLoadToLL(LoadInst *LI);
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
bool expandAtomicStore(StoreInst *SI);
- bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool tryExpandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
@@ -135,9 +135,12 @@ bool AtomicExpand::runOnFunction(Function &F) {
// - into a load if it is idempotent
// - into a Cmpxchg/LL-SC loop otherwise
// we try them in that order.
- MadeChange |=
- (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
- (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI));
+
+ if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+ MadeChange = true;
+ } else {
+ MadeChange |= tryExpandAtomicRMW(RMWI);
+ }
} else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
@@ -211,7 +214,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
// or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
- // It is the responsibility of the target to only return true in
+ // It is the responsibility of the target to only signal expansion via
// shouldExpandAtomicRMW in cases where this is required and possible.
IRBuilder<> Builder(SI);
AtomicRMWInst *AI =
@@ -220,14 +223,26 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
- return expandAtomicRMW(AI);
+ return tryExpandAtomicRMW(AI);
}
-bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
- if (TLI->hasLoadLinkedStoreConditional())
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ case TargetLoweringBase::AtomicRMWExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
+ assert(TLI->hasLoadLinkedStoreConditional() &&
+ "TargetLowering requested we expand AtomicRMW instruction into "
+ "load-linked/store-conditional combos, but such instructions aren't "
+ "supported");
+
return expandAtomicRMWToLLSC(AI);
- else
+ }
+ case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
return expandAtomicRMWToCmpXchg(AI);
+ }
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index b8f05cd..abe7ca1 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -727,6 +728,62 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
+static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) {
+ auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end();
+ auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end();
+ if ((E1 - I1) != (E2 - I2))
+ return false;
+ for (; I1 != E1; ++I1, ++I2) {
+ if (**I1 != **I2)
+ return false;
+ }
+ return true;
+}
+
+static void
+removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
+ MachineBasicBlock &MBBCommon) {
+ // Remove MMOs from memory operations in the common block
+ // when they do not match the ones from the block being tail-merged.
+ // This ensures later passes conservatively compute dependencies.
+ MachineBasicBlock *MBB = MBBIStartPos->getParent();
+ // Note CommonTailLen does not necessarily matches the size of
+ // the common BB nor all its instructions because of debug
+ // instructions differences.
+ unsigned CommonTailLen = 0;
+ for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos)
+ ++CommonTailLen;
+
+ MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin();
+ MachineBasicBlock::reverse_iterator MBBIE = MBB->rend();
+ MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin();
+ MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend();
+
+ while (CommonTailLen--) {
+ assert(MBBI != MBBIE && "Reached BB end within common tail length!");
+ (void)MBBIE;
+
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+
+ while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue())
+ ++MBBICommon;
+
+ assert(MBBICommon != MBBIECommon &&
+ "Reached BB end within common tail length!");
+ assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
+
+ if (MBBICommon->mayLoad() || MBBICommon->mayStore())
+ if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
+ MBBICommon->clearMemRefs();
+
+ ++MBBI;
+ ++MBBICommon;
+ }
+}
+
// See if any of the blocks in MergePotentials (which all have a common single
// successor, or all have no successor) can be tail-merged. If there is a
// successor, any blocks in MergePotentials that are not tail-merged and
@@ -761,7 +818,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Sort by hash value so that blocks with identical end sequences sort
// together.
- std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+ array_pod_sort(MergePotentials.begin(), MergePotentials.end());
// Walk through equivalence sets looking for actual exact matches.
while (MergePotentials.size() > 1) {
@@ -840,6 +897,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
continue;
DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
<< (i == e-1 ? "" : ", "));
+ // Remove MMOs from memory operations as needed.
+ removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB);
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index f21d4d2..ef57638 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -19,7 +19,6 @@ add_llvm_library(LLVMCodeGen
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
- ForwardControlFlowIntegrity.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCRootLowering.cpp
@@ -29,7 +28,6 @@ add_llvm_library(LLVMCodeGen
InlineSpiller.cpp
InterferenceCache.cpp
IntrinsicLowering.cpp
- JumpInstrTables.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
LexicalScopes.cpp
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 7c0068e..da66639 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -24,9 +24,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBranchFolderPassPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
+ initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
- initializeExpandPostRAPass(Registry);
initializeExpandISelPseudosPass(Registry);
+ initializeExpandPostRAPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
@@ -36,31 +37,34 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveStacksPass(Registry);
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
- initializeMachineCopyPropagationPass(Registry);
- initializeMachineCombinerPass(Registry);
initializeMachineCSEPass(Registry);
+ initializeMachineCombinerPass(Registry);
+ initializeMachineCopyPropagationPass(Registry);
initializeMachineDominatorTreePass(Registry);
- initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
+ initializeMachinePostDominatorTreePass(Registry);
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineVerifierPassPass(Registry);
initializeOptimizePHIsPass(Registry);
+ initializePEIPass(Registry);
initializePHIEliminationPass(Registry);
initializePeepholeOptimizerPass(Registry);
initializePostMachineSchedulerPass(Registry);
initializePostRASchedulerPass(Registry);
initializeProcessImplicitDefsPass(Registry);
- initializePEIPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeSlotIndexesPass(Registry);
- initializeStackProtectorPass(Registry);
initializeStackColoringPass(Registry);
+ initializeStackMapLivenessPass(Registry);
+ initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeTailDuplicatePassPass(Registry);
initializeTargetPassConfigPass(Registry);
@@ -70,9 +74,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeUnreachableMachineBlockElimPass(Registry);
initializeVirtRegMapPass(Registry);
initializeVirtRegRewriterPass(Registry);
- initializeLowerIntrinsicsPass(Registry);
- initializeMachineFunctionPrinterPassPass(Registry);
- initializeStackMapLivenessPass(Registry);
+ initializeWinEHPreparePass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c0d7dca..6c9d048 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -124,7 +124,6 @@ class TypePromotionTransaction;
const TargetLowering *TLI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
- DominatorTree *DT;
/// CurInstIterator - As we scan instructions optimizing them, this is the
/// next instruction to optimize. Xforms that can invalidate this should
@@ -142,8 +141,7 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
- /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
- /// be updated.
+ /// ModifiedDT - If CFG is modified in anyway.
bool ModifiedDT;
/// OptSize - True if optimizing for size.
@@ -186,7 +184,7 @@ class TypePromotionTransaction;
bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInst);
+ unsigned CreatedInstCost);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
};
@@ -214,9 +212,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = TM->getSubtargetImpl(F)->getTargetLowering();
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
/// This optimization identifies DIV instructions that can be
@@ -255,7 +250,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
- ModifiedDT |= ModifiedDTOnIteration;
if (ModifiedDTOnIteration)
break;
}
@@ -298,8 +292,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (EverMadeChange || MadeChange)
MadeChange |= EliminateFallThrough(F);
- if (MadeChange)
- ModifiedDT = true;
EverMadeChange |= MadeChange;
}
@@ -313,9 +305,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |= simplifyOffsetableRelocate(*I);
}
- if (ModifiedDT && DT)
- DT->recalculate(F);
-
return EverMadeChange;
}
@@ -341,7 +330,7 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// Remember if SinglePred was the entry block of the function.
// If so, we will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(BB, DT);
+ MergeBasicBlockIntoOnlyPred(BB, nullptr);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -481,7 +470,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
- MergeBasicBlockIntoOnlyPred(DestBB, DT);
+ MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
@@ -523,13 +512,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
- if (DT && !ModifiedDT) {
- BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock();
- BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
- DT->changeImmediateDominator(DestBB, NewIDom);
- DT->eraseNode(BB);
- }
BB->eraseFromParent();
++NumBlocksElim;
@@ -561,12 +543,15 @@ static void computeBaseDerivedRelocateMap(
IntrinsicInst *I = Item.second;
auto BaseKey = std::make_pair(Key.first, Key.first);
- IntrinsicInst *Base = RelocateIdxMap[BaseKey];
- if (!Base)
+
+ // We're iterating over RelocateIdxMap so we cannot modify it.
+ auto MaybeBase = RelocateIdxMap.find(BaseKey);
+ if (MaybeBase == RelocateIdxMap.end())
// TODO: We might want to insert a new base object relocate and gep off
// that, if there are enough derived object relocates.
continue;
- RelocateInstMap[Base].push_back(I);
+
+ RelocateInstMap[MaybeBase->second].push_back(I);
}
}
@@ -615,8 +600,8 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
// Create a Builder and replace the target callsite with a gep
IRBuilder<> Builder(ToReplace);
Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
- Value *Replacement =
- Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV));
+ Value *Replacement = Builder.CreateGEP(
+ Derived->getSourceElementType(), RelocatedBase, makeArrayRef(OffsetV));
Instruction *ReplacementInst = cast<Instruction>(Replacement);
ReplacementInst->removeFromParent();
ReplacementInst->insertAfter(RelocatedBase);
@@ -1225,6 +1210,42 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
return true;
}
+ const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
+
+ // Align the pointer arguments to this call if the target thinks it's a good
+ // idea
+ unsigned MinSize, PrefAlign;
+ if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+ for (auto &Arg : CI->arg_operands()) {
+ // We want to align both objects whose address is used directly and
+ // objects whose address is used in casts and GEPs, though it only makes
+ // sense for GEPs if the offset is a multiple of the desired alignment and
+ // if size - offset meets the size threshold.
+ if (!Arg->getType()->isPointerTy())
+ continue;
+ APInt Offset(TD->getPointerSizeInBits(
+ cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
+ Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+ uint64_t Offset2 = Offset.getLimitedValue();
+ AllocaInst *AI;
+ if ((Offset2 & (PrefAlign-1)) == 0 &&
+ (AI = dyn_cast<AllocaInst>(Val)) &&
+ AI->getAlignment() < PrefAlign &&
+ TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+ AI->setAlignment(PrefAlign);
+ // TODO: Also align GlobalVariables
+ }
+ // If this is a memcpy (or similar) then we may be able to improve the
+ // alignment
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
+ unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+ Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+ if (Align > MI->getAlignment())
+ MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
+ }
+ }
+
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
switch (II->getIntrinsicID()) {
@@ -1241,8 +1262,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
WeakVH IterHandle(CurInstIterator);
replaceAndRecursivelySimplify(CI, RetVal,
- TLI ? TLI->getDataLayout() : nullptr,
- TLInfo, ModifiedDT ? nullptr : DT);
+ TLInfo, nullptr);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
@@ -1284,15 +1304,11 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
// From here on out we're working with named functions.
if (!CI->getCalledFunction()) return false;
- // We'll need DataLayout from here on out.
- const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
- if (!TD) return false;
-
// Lower all default uses of _chk calls. This is very similar
// to what InstCombineCalls does, but here we are only lowering calls
// to fortified library functions (e.g. __memcpy_chk) that have the default
// "don't know" as the objectsize. Anything else should be left alone.
- FortifiedLibCallSimplifier Simplifier(TD, TLInfo, true);
+ FortifiedLibCallSimplifier Simplifier(TLInfo, true);
if (Value *V = Simplifier.optimizeCall(CI)) {
CI->replaceAllUsesWith(V);
CI->eraseFromParent();
@@ -2025,7 +2041,7 @@ private:
ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter);
bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
- bool IsPromotionProfitable(unsigned MatchedSize, unsigned SizeWithPromotion,
+ bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
Value *PromotedOperand) const;
};
@@ -2159,7 +2175,7 @@ class TypePromotionHelper {
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is a promotable trunc or sext or zext.
/// \p PromotedInsts maps the instructions to their type before promotion.
- /// \p CreatedInsts[out] contains how many non-free instructions have been
+ /// \p CreatedInstsCost[out] contains the cost of all instructions
/// created to promote the operand of Ext.
/// Newly added extensions are inserted in \p Exts.
/// Newly added truncates are inserted in \p Truncs.
@@ -2167,53 +2183,55 @@ class TypePromotionHelper {
/// \return The promoted value which is used instead of Ext.
static Value *promoteOperandForTruncAndAnyExt(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs);
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
/// \brief Utility function to promote the operand of \p Ext when this
/// operand is promotable and is not a supported trunc or sext.
/// \p PromotedInsts maps the instructions to their type before promotion.
- /// \p CreatedInsts[out] contains how many non-free instructions have been
+ /// \p CreatedInstsCost[out] contains the cost of all the instructions
/// created to promote the operand of Ext.
/// Newly added extensions are inserted in \p Exts.
/// Newly added truncates are inserted in \p Truncs.
/// Should never be called directly.
/// \return The promoted value which is used instead of Ext.
- static Value *
- promoteOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs, bool IsSExt);
+ static Value *promoteOperandForOther(Instruction *Ext,
+ TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI, bool IsSExt);
/// \see promoteOperandForOther.
- static Value *
- signExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
- Truncs, true);
+ static Value *signExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, true);
}
/// \see promoteOperandForOther.
- static Value *
- zeroExtendOperandForOther(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts,
- unsigned &CreatedInsts,
- SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
- return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInsts, Exts,
- Truncs, false);
+ static Value *zeroExtendOperandForOther(
+ Instruction *Ext, TypePromotionTransaction &TPT,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
+ SmallVectorImpl<Instruction *> *Exts,
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
+ return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
+ Exts, Truncs, TLI, false);
}
public:
/// Type for the utility function that promotes the operand of Ext.
typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts,
+ unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs);
+ SmallVectorImpl<Instruction *> *Truncs,
+ const TargetLowering &TLI);
/// \brief Given a sign/zero extend instruction \p Ext, return the approriate
/// action to promote the operand of \p Ext instead of using Ext.
/// \return NULL if no promotable action is possible with the current
@@ -2330,16 +2348,18 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
llvm::Instruction *SExt, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs) {
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
// By construction, the operand of SExt is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
Value *ExtVal = SExt;
+ bool HasMergedNonFreeExt = false;
if (isa<ZExtInst>(SExtOpnd)) {
// Replace s|zext(zext(opnd))
// => zext(opnd).
+ HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
Value *ZExt =
TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
TPT.replaceAllUsesWith(SExt, ZExt);
@@ -2350,7 +2370,7 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// => z|sext(opnd).
TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
}
- CreatedInsts = 0;
+ CreatedInstsCost = 0;
// Remove dead code.
if (SExtOpnd->use_empty())
@@ -2359,8 +2379,11 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
// Check if the extension is still needed.
Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
- if (ExtInst && Exts)
- Exts->push_back(ExtInst);
+ if (ExtInst) {
+ if (Exts)
+ Exts->push_back(ExtInst);
+ CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
+ }
return ExtVal;
}
@@ -2373,13 +2396,14 @@ Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
Value *TypePromotionHelper::promoteOperandForOther(
Instruction *Ext, TypePromotionTransaction &TPT,
- InstrToOrigTy &PromotedInsts, unsigned &CreatedInsts,
+ InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
SmallVectorImpl<Instruction *> *Exts,
- SmallVectorImpl<Instruction *> *Truncs, bool IsSExt) {
+ SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
+ bool IsSExt) {
// By construction, the operand of Ext is an instruction. Otherwise we cannot
// get through it and this method should not be called.
Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
- CreatedInsts = 0;
+ CreatedInstsCost = 0;
if (!ExtOpnd->hasOneUse()) {
// ExtOpnd will be promoted.
// All its uses, but Ext, will need to use a truncated value of the
@@ -2454,7 +2478,6 @@ Value *TypePromotionHelper::promoteOperandForOther(
continue;
}
ExtForOpnd = cast<Instruction>(ValForExtOpnd);
- ++CreatedInsts;
}
if (Exts)
Exts->push_back(ExtForOpnd);
@@ -2463,6 +2486,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
// Move the sign extension before the insertion point.
TPT.moveBefore(ExtForOpnd, ExtOpnd);
TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
+ CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
// If more sext are required, new instructions will have to be created.
ExtForOpnd = nullptr;
}
@@ -2475,22 +2499,22 @@ Value *TypePromotionHelper::promoteOperandForOther(
/// IsPromotionProfitable - Check whether or not promoting an instruction
/// to a wider type was profitable.
-/// \p MatchedSize gives the number of instructions that have been matched
-/// in the addressing mode after the promotion was applied.
-/// \p SizeWithPromotion gives the number of created instructions for
-/// the promotion plus the number of instructions that have been
-/// matched in the addressing mode before the promotion.
+/// \p NewCost gives the cost of extension instructions created by the
+/// promotion.
+/// \p OldCost gives the cost of extension instructions before the promotion
+/// plus the number of instructions that have been
+/// matched in the addressing mode the promotion.
/// \p PromotedOperand is the value that has been promoted.
/// \return True if the promotion is profitable, false otherwise.
-bool
-AddressingModeMatcher::IsPromotionProfitable(unsigned MatchedSize,
- unsigned SizeWithPromotion,
- Value *PromotedOperand) const {
- // We folded less instructions than what we created to promote the operand.
+bool AddressingModeMatcher::IsPromotionProfitable(
+ unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
+ DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
+ // The cost of the new extensions is greater than the cost of the
+ // old extension plus what we folded.
// This is not profitable.
- if (MatchedSize < SizeWithPromotion)
+ if (NewCost > OldCost)
return false;
- if (MatchedSize > SizeWithPromotion)
+ if (NewCost < OldCost)
return true;
// The promotion is neutral but it may help folding the sign extension in
// loads for instance.
@@ -2688,9 +2712,10 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- unsigned CreatedInsts = 0;
+ unsigned CreatedInstsCost = 0;
+ unsigned ExtCost = !TLI.isExtFree(Ext);
Value *PromotedOperand =
- TPH(Ext, TPT, PromotedInsts, CreatedInsts, nullptr, nullptr);
+ TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
// SExt has been moved away.
// Thus either it will be rematched later in the recursive calls or it is
// gone. Anyway, we must not fold it into the addressing mode at this point.
@@ -2712,7 +2737,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
unsigned OldSize = AddrModeInsts.size();
if (!MatchAddr(PromotedOperand, Depth) ||
- !IsPromotionProfitable(AddrModeInsts.size(), OldSize + CreatedInsts,
+ // The total of the new cost is equals to the cost of the created
+ // instructions.
+ // The total of the old cost is equals to the cost of the extension plus
+ // what we have saved in the addressing mode.
+ !IsPromotionProfitable(CreatedInstsCost,
+ ExtCost + (AddrModeInsts.size() - OldSize),
PromotedOperand)) {
AddrMode = BackupAddrMode;
AddrModeInsts.resize(OldSize);
@@ -3472,7 +3502,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
LoadInst *&LI, Instruction *&Inst,
const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInsts = 0) {
+ unsigned CreatedInstsCost = 0) {
// Iterate over all the extensions to see if one form an ext(load).
for (auto I : Exts) {
// Check if we directly have ext(load).
@@ -3494,10 +3524,11 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
SmallVector<Instruction *, 4> NewExts;
- unsigned NewCreatedInsts = 0;
+ unsigned NewCreatedInstsCost = 0;
+ unsigned ExtCost = !TLI->isExtFree(I);
// Promote.
- Value *PromotedVal =
- TPH(I, TPT, PromotedInsts, NewCreatedInsts, &NewExts, nullptr);
+ Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
+ &NewExts, nullptr, *TLI);
assert(PromotedVal &&
"TypePromotionHelper should have filtered out those cases");
@@ -3507,9 +3538,10 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
// With exactly 2, the transformation is neutral, because we will merge
// one extension but leave one. However, we optimistically keep going,
// because the new extension may be removed too.
- unsigned TotalCreatedInsts = CreatedInsts + NewCreatedInsts;
+ long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
+ TotalCreatedInstsCost -= ExtCost;
if (!StressExtLdPromotion &&
- (TotalCreatedInsts > 1 ||
+ (TotalCreatedInstsCost > 1 ||
!isPromotedInstructionLegal(*TLI, PromotedVal))) {
// The promotion is not profitable, rollback to the previous state.
TPT.rollback(LastKnownGood);
@@ -3517,8 +3549,8 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
}
// The promotion is profitable.
// Check if it exposes an ext(load).
- (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInsts);
- if (LI && (StressExtLdPromotion || NewCreatedInsts == 0 ||
+ (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+ if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
// If we have created a new extension, i.e., now we have two
// extensions. We must make sure one of them is merged with
// the load, otherwise we may degrade the code quality.
@@ -4193,8 +4225,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
- TLInfo, DT)) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
@@ -4463,8 +4495,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
bool CodeGenPrepare::splitBranchCondition(Function &F) {
- if (!TM || TM->Options.EnableFastISel != true ||
- !TLI || TLI->isJumpExpensive())
+ if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
return false;
bool MadeChange = false;
@@ -4625,10 +4656,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
}
- // Request DOM Tree update.
// Note: No point in getting fancy here, since the DT info is never
- // available to CodeGenPrepare and the existing update code is broken
- // anyways.
+ // available to CodeGenPrepare.
ModifiedDT = true;
MadeChange = true;
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 7b47a48..42656fb 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -13,13 +13,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "dwarfehprepare"
@@ -33,18 +39,28 @@ namespace {
// RewindFunction - _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
+ DominatorTree *DT;
+ const TargetLowering *TLI;
+
bool InsertUnwindResumeCalls(Function &Fn);
Value *GetExceptionObject(ResumeInst *RI);
+ size_t
+ pruneUnreachableResumes(Function &Fn,
+ SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads);
public:
static char ID; // Pass identification, replacement for typeid.
// INITIALIZE_TM_PASS requires a default constructor, but it isn't used in
// practice.
- DwarfEHPrepare() : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr) {}
+ DwarfEHPrepare()
+ : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr),
+ TLI(nullptr) {}
DwarfEHPrepare(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {}
+ : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr),
+ TLI(nullptr) {}
bool runOnFunction(Function &Fn) override;
@@ -53,6 +69,8 @@ namespace {
return false;
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
const char *getPassName() const override {
return "Exception handling preparation";
}
@@ -60,13 +78,22 @@ namespace {
} // end anonymous namespace
char DwarfEHPrepare::ID = 0;
-INITIALIZE_TM_PASS(DwarfEHPrepare, "dwarfehprepare", "Prepare DWARF exceptions",
- false, false)
+INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare",
+ "Prepare DWARF exceptions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare",
+ "Prepare DWARF exceptions", false, false)
FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
return new DwarfEHPrepare(TM);
}
+void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+}
+
/// GetExceptionObject - Return the exception object from the value passed into
/// the 'resume' instruction (typically an aggregate). Clean up any dead
/// instructions, including the 'resume' instruction.
@@ -107,21 +134,81 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
return ExnObj;
}
+/// Replace resumes that are not reachable from a cleanup landing pad with
+/// unreachable and then simplify those blocks.
+size_t DwarfEHPrepare::pruneUnreachableResumes(
+ Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes,
+ SmallVectorImpl<LandingPadInst *> &CleanupLPads) {
+ BitVector ResumeReachable(Resumes.size());
+ size_t ResumeIndex = 0;
+ for (auto *RI : Resumes) {
+ for (auto *LP : CleanupLPads) {
+ if (isPotentiallyReachable(LP, RI, DT)) {
+ ResumeReachable.set(ResumeIndex);
+ break;
+ }
+ }
+ ++ResumeIndex;
+ }
+
+ // If everything is reachable, there is no change.
+ if (ResumeReachable.all())
+ return Resumes.size();
+
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn);
+ LLVMContext &Ctx = Fn.getContext();
+
+ // Otherwise, insert unreachable instructions and call simplifycfg.
+ size_t ResumesLeft = 0;
+ for (size_t I = 0, E = Resumes.size(); I < E; ++I) {
+ ResumeInst *RI = Resumes[I];
+ if (ResumeReachable[I]) {
+ Resumes[ResumesLeft++] = RI;
+ } else {
+ BasicBlock *BB = RI->getParent();
+ new UnreachableInst(Ctx, RI);
+ RI->eraseFromParent();
+ SimplifyCFG(BB, TTI, 1);
+ }
+ }
+ Resumes.resize(ResumesLeft);
+ return ResumesLeft;
+}
+
/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
/// into calls to the appropriate _Unwind_Resume function.
bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
SmallVector<ResumeInst*, 16> Resumes;
+ SmallVector<LandingPadInst*, 16> CleanupLPads;
+ bool FoundLP = false;
for (BasicBlock &BB : Fn) {
if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator()))
Resumes.push_back(RI);
+ if (auto *LP = BB.getLandingPadInst()) {
+ if (LP->isCleanup())
+ CleanupLPads.push_back(LP);
+ // Check the personality on the first landingpad. Don't do anything if
+ // it's for MSVC.
+ if (!FoundLP) {
+ FoundLP = true;
+ EHPersonality Pers = classifyEHPersonality(LP->getPersonalityFn());
+ if (isMSVCEHPersonality(Pers))
+ return false;
+ }
+ }
}
if (Resumes.empty())
return false;
- // Find the rewind function if we didn't already.
- const TargetLowering *TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
LLVMContext &Ctx = Fn.getContext();
+
+ size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads);
+ if (ResumesLeft == 0)
+ return true; // We pruned them all.
+
+ // Find the rewind function if we didn't already.
if (!RewindFunction) {
FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
Type::getInt8PtrTy(Ctx), false);
@@ -130,9 +217,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
}
// Create the basic block where the _Unwind_Resume call will live.
- unsigned ResumesSize = Resumes.size();
-
- if (ResumesSize == 1) {
+ if (ResumesLeft == 1) {
// Instead of creating a new BB and PHI node, just append the call to
// _Unwind_Resume to the end of the single resume block.
ResumeInst *RI = Resumes.front();
@@ -149,7 +234,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
}
BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
- PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
+ PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft,
"exn.obj", UnwindBB);
// Extract the exception object from the ResumeInst and add it to the PHI node
@@ -175,6 +260,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
assert(TM && "DWARF EH preparation requires a target machine");
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TLI = TM->getSubtargetImpl(Fn)->getTargetLowering();
bool Changed = InsertUnwindResumeCalls(Fn);
+ DT = nullptr;
+ TLI = nullptr;
return Changed;
}
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index b3a22c8..5b09cf1 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -113,7 +113,7 @@ struct DomainValue {
}
namespace {
-/// LiveReg - Information about a live register.
+/// Information about a live register.
struct LiveReg {
/// Value currently in this register, or NULL when no value is being tracked.
/// This counts as a DomainValue reference.
@@ -125,7 +125,7 @@ struct LiveReg {
/// will be a negative number.
int Def;
};
-} // anonynous namespace
+} // anonymous namespace
namespace {
class ExeDepsFix : public MachineFunctionPass {
@@ -174,7 +174,7 @@ public:
private:
iterator_range<SmallVectorImpl<int>::const_iterator>
- regIndizes(unsigned Reg) const;
+ regIndices(unsigned Reg) const;
// DomainValue allocation.
DomainValue *alloc(int domain = -1);
@@ -205,10 +205,10 @@ private:
char ExeDepsFix::ID = 0;
-/// Translate TRI register number to a list of indizes into our stmaller tables
+/// Translate TRI register number to a list of indices into our smaller tables
/// of interesting registers.
iterator_range<SmallVectorImpl<int>::const_iterator>
-ExeDepsFix::regIndizes(unsigned Reg) const {
+ExeDepsFix::regIndices(unsigned Reg) const {
assert(Reg < AliasMap.size() && "Invalid register");
const auto &Entry = AliasMap[Reg];
return make_range(Entry.begin(), Entry.end());
@@ -225,7 +225,7 @@ DomainValue *ExeDepsFix::alloc(int domain) {
return dv;
}
-/// release - Release a reference to DV. When the last reference is released,
+/// Release a reference to DV. When the last reference is released,
/// collapse if needed.
void ExeDepsFix::release(DomainValue *DV) {
while (DV) {
@@ -245,8 +245,8 @@ void ExeDepsFix::release(DomainValue *DV) {
}
}
-/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
-/// reached. Update the referenced pointer when necessary.
+/// Follow the chain of dead DomainValues until a live DomainValue is reached.
+/// Update the referenced pointer when necessary.
DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
DomainValue *DV = DVRef;
if (!DV || !DV->Next)
@@ -325,8 +325,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
setLiveReg(rx, alloc(domain));
}
-/// Merge - All instructions and registers in B are moved to A, and B is
-/// released.
+/// All instructions and registers in B are moved to A, and B is released.
bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed");
assert(!B->isCollapsed() && "Cannot merge from collapsed");
@@ -352,7 +351,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
return true;
}
-// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+/// Set up LiveRegs by merging predecessor live-out values.
void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Detect back-edges from predecessors we haven't processed yet.
SeenUnknownBackEdge = false;
@@ -378,7 +377,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
if (MBB->pred_empty()) {
for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
e = MBB->livein_end(); i != e; ++i) {
- for (int rx : regIndizes(*i)) {
+ for (int rx : regIndices(*i)) {
// Treat function live-ins as if they were defined just before the first
// instruction. Usually, function arguments are set up immediately
// before the call.
@@ -475,7 +474,7 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
unsigned reg = MI->getOperand(OpIdx).getReg();
- for (int rx : regIndizes(reg)) {
+ for (int rx : regIndices(reg)) {
unsigned Clearance = CurInstr - LiveRegs[rx].Def;
DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
@@ -521,7 +520,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
break;
if (MO.isUse())
continue;
- for (int rx : regIndizes(MO.getReg())) {
+ for (int rx : regIndices(MO.getReg())) {
// This instruction explicitly defines rx.
DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
<< '\t' << *MI);
@@ -587,7 +586,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- for (int rx : regIndizes(mo.getReg())) {
+ for (int rx : regIndices(mo.getReg())) {
force(rx, domain);
}
}
@@ -596,7 +595,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- for (int rx : regIndizes(mo.getReg())) {
+ for (int rx : regIndices(mo.getReg())) {
kill(rx);
force(rx, domain);
}
@@ -616,7 +615,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
e = mi->getDesc().getNumOperands(); i != e; ++i) {
MachineOperand &mo = mi->getOperand(i);
if (!mo.isReg()) continue;
- for (int rx : regIndizes(mo.getReg())) {
+ for (int rx : regIndices(mo.getReg())) {
DomainValue *dv = LiveRegs[rx].Value;
if (dv == nullptr)
continue;
@@ -712,7 +711,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
ii != ee; ++ii) {
MachineOperand &mo = *ii;
if (!mo.isReg()) continue;
- for (int rx : regIndizes(mo.getReg())) {
+ for (int rx : regIndices(mo.getReg())) {
if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
kill(rx);
setLiveReg(rx, dv);
diff --git a/lib/CodeGen/ForwardControlFlowIntegrity.cpp b/lib/CodeGen/ForwardControlFlowIntegrity.cpp
deleted file mode 100644
index 63c3699..0000000
--- a/lib/CodeGen/ForwardControlFlowIntegrity.cpp
+++ /dev/null
@@ -1,374 +0,0 @@
-//===-- ForwardControlFlowIntegrity.cpp: Forward-Edge CFI -----------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief A pass that instruments code with fast checks for indirect calls and
-/// hooks for a function to check violations.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "cfi"
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
-#include "llvm/CodeGen/JumpInstrTables.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-STATISTIC(NumCFIIndirectCalls,
- "Number of indirect call sites rewritten by the CFI pass");
-
-char ForwardControlFlowIntegrity::ID = 0;
-INITIALIZE_PASS_BEGIN(ForwardControlFlowIntegrity, "forward-cfi",
- "Control-Flow Integrity", true, true)
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTables);
-INITIALIZE_PASS_END(ForwardControlFlowIntegrity, "forward-cfi",
- "Control-Flow Integrity", true, true)
-
-ModulePass *llvm::createForwardControlFlowIntegrityPass() {
- return new ForwardControlFlowIntegrity();
-}
-
-ModulePass *llvm::createForwardControlFlowIntegrityPass(
- JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
- StringRef CFIFuncName) {
- return new ForwardControlFlowIntegrity(JTT, CFIType, CFIEnforcing,
- CFIFuncName);
-}
-
-// Checks to see if a given CallSite is making an indirect call, including
-// cases where the indirect call is made through a bitcast.
-static bool isIndirectCall(CallSite &CS) {
- if (CS.getCalledFunction())
- return false;
-
- // Check the value to see if it is merely a bitcast of a function. In
- // this case, it will translate to a direct function call in the resulting
- // assembly, so we won't treat it as an indirect call here.
- const Value *V = CS.getCalledValue();
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
- }
-
- // Otherwise, since we know it's a call, it must be an indirect call
- return true;
-}
-
-static const char cfi_failure_func_name[] = "__llvm_cfi_pointer_warning";
-
-ForwardControlFlowIntegrity::ForwardControlFlowIntegrity()
- : ModulePass(ID), IndirectCalls(), JTType(JumpTable::Single),
- CFIType(CFIntegrity::Sub), CFIEnforcing(false), CFIFuncName("") {
- initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
-}
-
-ForwardControlFlowIntegrity::ForwardControlFlowIntegrity(
- JumpTable::JumpTableType JTT, CFIntegrity CFIType, bool CFIEnforcing,
- std::string CFIFuncName)
- : ModulePass(ID), IndirectCalls(), JTType(JTT), CFIType(CFIType),
- CFIEnforcing(CFIEnforcing), CFIFuncName(CFIFuncName) {
- initializeForwardControlFlowIntegrityPass(*PassRegistry::getPassRegistry());
-}
-
-ForwardControlFlowIntegrity::~ForwardControlFlowIntegrity() {}
-
-void ForwardControlFlowIntegrity::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<JumpInstrTableInfo>();
- AU.addRequired<JumpInstrTables>();
-}
-
-void ForwardControlFlowIntegrity::getIndirectCalls(Module &M) {
- // To get the indirect calls, we iterate over all functions and iterate over
- // the list of basic blocks in each. We extract a total list of indirect calls
- // before modifying any of them, since our modifications will modify the list
- // of basic blocks.
- for (Function &F : M) {
- for (BasicBlock &BB : F) {
- for (Instruction &I : BB) {
- CallSite CS(&I);
- if (!(CS && isIndirectCall(CS)))
- continue;
-
- Value *CalledValue = CS.getCalledValue();
-
- // Don't rewrite this instruction if the indirect call is actually just
- // inline assembly, since our transformation will generate an invalid
- // module in that case.
- if (isa<InlineAsm>(CalledValue))
- continue;
-
- IndirectCalls.push_back(&I);
- }
- }
- }
-}
-
-void ForwardControlFlowIntegrity::updateIndirectCalls(Module &M,
- CFITables &CFIT) {
- Type *Int64Ty = Type::getInt64Ty(M.getContext());
- for (Instruction *I : IndirectCalls) {
- CallSite CS(I);
- Value *CalledValue = CS.getCalledValue();
-
- // Get the function type for this call and look it up in the tables.
- Type *VTy = CalledValue->getType();
- PointerType *PTy = dyn_cast<PointerType>(VTy);
- Type *EltTy = PTy->getElementType();
- FunctionType *FunTy = dyn_cast<FunctionType>(EltTy);
- FunctionType *TransformedTy = JumpInstrTables::transformType(JTType, FunTy);
- ++NumCFIIndirectCalls;
- Constant *JumpTableStart = nullptr;
- Constant *JumpTableMask = nullptr;
- Constant *JumpTableSize = nullptr;
-
- // Some call sites have function types that don't correspond to any
- // address-taken function in the module. This happens when function pointers
- // are passed in from external code.
- auto it = CFIT.find(TransformedTy);
- if (it == CFIT.end()) {
- // In this case, make sure that the function pointer will change by
- // setting the mask and the start to be 0 so that the transformed
- // function is 0.
- JumpTableStart = ConstantInt::get(Int64Ty, 0);
- JumpTableMask = ConstantInt::get(Int64Ty, 0);
- JumpTableSize = ConstantInt::get(Int64Ty, 0);
- } else {
- JumpTableStart = it->second.StartValue;
- JumpTableMask = it->second.MaskValue;
- JumpTableSize = it->second.Size;
- }
-
- rewriteFunctionPointer(M, I, CalledValue, JumpTableStart, JumpTableMask,
- JumpTableSize);
- }
-
- return;
-}
-
-bool ForwardControlFlowIntegrity::runOnModule(Module &M) {
- JumpInstrTableInfo *JITI = &getAnalysis<JumpInstrTableInfo>();
- Type *Int64Ty = Type::getInt64Ty(M.getContext());
- Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-
- // JumpInstrTableInfo stores information about the alignment of each entry.
- // The alignment returned by JumpInstrTableInfo is alignment in bytes, not
- // in the exponent.
- ByteAlignment = JITI->entryByteAlignment();
- LogByteAlignment = llvm::Log2_64(ByteAlignment);
-
- // Set up tables for control-flow integrity based on information about the
- // jump-instruction tables.
- CFITables CFIT;
- for (const auto &KV : JITI->getTables()) {
- uint64_t Size = static_cast<uint64_t>(KV.second.size());
- uint64_t TableSize = NextPowerOf2(Size);
-
- int64_t MaskValue = ((TableSize << LogByteAlignment) - 1) & -ByteAlignment;
- Constant *JumpTableMaskValue = ConstantInt::get(Int64Ty, MaskValue);
- Constant *JumpTableSize = ConstantInt::get(Int64Ty, Size);
-
- // The base of the table is defined to be the first jumptable function in
- // the table.
- Function *First = KV.second.begin()->second;
- Constant *JumpTableStartValue = ConstantExpr::getBitCast(First, VoidPtrTy);
- CFIT[KV.first].StartValue = JumpTableStartValue;
- CFIT[KV.first].MaskValue = JumpTableMaskValue;
- CFIT[KV.first].Size = JumpTableSize;
- }
-
- if (CFIT.empty())
- return false;
-
- getIndirectCalls(M);
-
- if (!CFIEnforcing) {
- addWarningFunction(M);
- }
-
- // Update the instructions with the check and the indirect jump through our
- // table.
- updateIndirectCalls(M, CFIT);
-
- return true;
-}
-
-void ForwardControlFlowIntegrity::addWarningFunction(Module &M) {
- PointerType *CharPtrTy = Type::getInt8PtrTy(M.getContext());
-
- // Get the type of the Warning Function: void (i8*, i8*),
- // where the first argument is the name of the function in which the violation
- // occurs, and the second is the function pointer that violates CFI.
- SmallVector<Type *, 2> WarningFunArgs;
- WarningFunArgs.push_back(CharPtrTy);
- WarningFunArgs.push_back(CharPtrTy);
- FunctionType *WarningFunTy =
- FunctionType::get(Type::getVoidTy(M.getContext()), WarningFunArgs, false);
-
- if (!CFIFuncName.empty()) {
- Constant *FailureFun = M.getOrInsertFunction(CFIFuncName, WarningFunTy);
- if (!FailureFun)
- report_fatal_error("Could not get or insert the function specified by"
- " -cfi-func-name");
- } else {
- // The default warning function swallows the warning and lets the call
- // continue, since there's no generic way for it to print out this
- // information.
- Function *WarningFun = M.getFunction(cfi_failure_func_name);
- if (!WarningFun) {
- WarningFun =
- Function::Create(WarningFunTy, GlobalValue::LinkOnceAnyLinkage,
- cfi_failure_func_name, &M);
- }
-
- BasicBlock *Entry =
- BasicBlock::Create(M.getContext(), "entry", WarningFun, 0);
- ReturnInst::Create(M.getContext(), Entry);
- }
-}
-
-void ForwardControlFlowIntegrity::rewriteFunctionPointer(
- Module &M, Instruction *I, Value *FunPtr, Constant *JumpTableStart,
- Constant *JumpTableMask, Constant *JumpTableSize) {
- IRBuilder<> TempBuilder(I);
-
- Type *OrigFunType = FunPtr->getType();
-
- BasicBlock *CurBB = cast<BasicBlock>(I->getParent());
- Function *CurF = cast<Function>(CurBB->getParent());
- Type *Int64Ty = Type::getInt64Ty(M.getContext());
-
- Value *TI = TempBuilder.CreatePtrToInt(FunPtr, Int64Ty);
- Value *TStartInt = TempBuilder.CreatePtrToInt(JumpTableStart, Int64Ty);
-
- Value *NewFunPtr = nullptr;
- Value *Check = nullptr;
- switch (CFIType) {
- case CFIntegrity::Sub: {
- // This is the subtract, mask, and add version.
- // Subtract from the base.
- Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
-
- // Mask the difference to force this to be a table offset.
- Value *And = TempBuilder.CreateAnd(Sub, JumpTableMask);
-
- // Add it back to the base.
- Value *Result = TempBuilder.CreateAdd(And, TStartInt);
-
- // Convert it back into a function pointer that we can call.
- NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
- break;
- }
- case CFIntegrity::Ror: {
- // This is the subtract and rotate version.
- // Rotate right by the alignment value. The optimizer should recognize
- // this sequence as a rotation.
-
- // This cast is safe, since unsigned is always a subset of uint64_t.
- uint64_t LogByteAlignment64 = static_cast<uint64_t>(LogByteAlignment);
- Constant *RightShift = ConstantInt::get(Int64Ty, LogByteAlignment64);
- Constant *LeftShift = ConstantInt::get(Int64Ty, 64 - LogByteAlignment64);
-
- // Subtract from the base.
- Value *Sub = TempBuilder.CreateSub(TI, TStartInt);
-
- // Create the equivalent of a rotate-right instruction.
- Value *Shr = TempBuilder.CreateLShr(Sub, RightShift);
- Value *Shl = TempBuilder.CreateShl(Sub, LeftShift);
- Value *Or = TempBuilder.CreateOr(Shr, Shl);
-
- // Perform unsigned comparison to check for inclusion in the table.
- Check = TempBuilder.CreateICmpULT(Or, JumpTableSize);
- NewFunPtr = FunPtr;
- break;
- }
- case CFIntegrity::Add: {
- // This is the mask and add version.
- // Mask the function pointer to turn it into an offset into the table.
- Value *And = TempBuilder.CreateAnd(TI, JumpTableMask);
-
- // Then or this offset to the base and get the pointer value.
- Value *Result = TempBuilder.CreateAdd(And, TStartInt);
-
- // Convert it back into a function pointer that we can call.
- NewFunPtr = TempBuilder.CreateIntToPtr(Result, OrigFunType);
- break;
- }
- }
-
- if (!CFIEnforcing) {
- // If a check hasn't been added (in the rotation version), then check to see
- // if it's the same as the original function. This check determines whether
- // or not we call the CFI failure function.
- if (!Check)
- Check = TempBuilder.CreateICmpEQ(NewFunPtr, FunPtr);
- BasicBlock *InvalidPtrBlock =
- BasicBlock::Create(M.getContext(), "invalid.ptr", CurF, 0);
- BasicBlock *ContinuationBB = CurBB->splitBasicBlock(I);
-
- // Remove the unconditional branch that connects the two blocks.
- TerminatorInst *TermInst = CurBB->getTerminator();
- TermInst->eraseFromParent();
-
- // Add a conditional branch that depends on the Check above.
- BranchInst::Create(ContinuationBB, InvalidPtrBlock, Check, CurBB);
-
- // Call the warning function for this pointer, then continue.
- Instruction *BI = BranchInst::Create(ContinuationBB, InvalidPtrBlock);
- insertWarning(M, InvalidPtrBlock, BI, FunPtr);
- } else {
- // Modify the instruction to call this value.
- CallSite CS(I);
- CS.setCalledFunction(NewFunPtr);
- }
-}
-
-void ForwardControlFlowIntegrity::insertWarning(Module &M, BasicBlock *Block,
- Instruction *I, Value *FunPtr) {
- Function *ParentFun = cast<Function>(Block->getParent());
-
- // Get the function to call right before the instruction.
- Function *WarningFun = nullptr;
- if (CFIFuncName.empty()) {
- WarningFun = M.getFunction(cfi_failure_func_name);
- } else {
- WarningFun = M.getFunction(CFIFuncName);
- }
-
- assert(WarningFun && "Could not find the CFI failure function");
-
- Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
-
- IRBuilder<> WarningInserter(I);
- // Create a mergeable GlobalVariable containing the name of the function.
- Value *ParentNameGV =
- WarningInserter.CreateGlobalString(ParentFun->getName());
- Value *ParentNamePtr = WarningInserter.CreateBitCast(ParentNameGV, VoidPtrTy);
- Value *FunVoidPtr = WarningInserter.CreateBitCast(FunPtr, VoidPtrTy);
- WarningInserter.CreateCall2(WarningFun, ParentNamePtr, FunVoidPtr);
-}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 7a29569..b8799a5 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -247,7 +247,7 @@ namespace {
return true;
else if (Incr1 == Incr2) {
// Favors subsumption.
- if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+ if (!C1->NeedSubsumption && C2->NeedSubsumption)
return true;
else if (C1->NeedSubsumption == C2->NeedSubsumption) {
// Favors diamond over triangle, etc.
@@ -726,6 +726,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
if (BBI.IsDone || BBI.IsUnpredicable)
return false;
+ // If it is already predicated but we couldn't analyze its terminator, the
+ // latter might fallthrough, but we can't determine where to.
+ // Conservatively avoid if-converting again.
+ if (BBI.Predicate.size() && !BBI.IsBrAnalyzable)
+ return false;
+
// If it is already predicated, check if the new predicate subsumes
// its predicate.
if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
@@ -1555,7 +1561,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
UpdatePredRedefs(I, Redefs);
}
- std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+ BBI.Predicate.append(Cond.begin(), Cond.end());
BBI.IsAnalyzed = false;
BBI.NonPredSize = 0;
@@ -1620,9 +1626,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
}
}
- std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
- std::back_inserter(ToBBI.Predicate));
- std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+ ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
+ ToBBI.Predicate.append(Cond.begin(), Cond.end());
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
ToBBI.IsAnalyzed = false;
@@ -1661,8 +1666,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
if (NBB && !FromBBI.BB->isSuccessor(NBB))
FromBBI.BB->addSuccessor(NBB);
- std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
- std::back_inserter(ToBBI.Predicate));
+ ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
FromBBI.Predicate.clear();
ToBBI.NonPredSize += FromBBI.NonPredSize;
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 187e015..fd5749b 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -21,7 +21,8 @@ using namespace llvm;
#define DEBUG_TYPE "regalloc"
// Static member used for null interference cursors.
-InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
+const InterferenceCache::BlockInterference
+ InterferenceCache::Cursor::NoInterference;
// Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a
// buffer of size NumPhysRegs to speed up alloc/clear for targets with large
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 1791afb..6519a80 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -170,8 +170,8 @@ public:
/// Cursor - The primary query interface for the block interference cache.
class Cursor {
Entry *CacheEntry;
- BlockInterference *Current;
- static BlockInterference NoInterference;
+ const BlockInterference *Current;
+ static const BlockInterference NoInterference;
void setEntry(Entry *E) {
Current = nullptr;
diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp
deleted file mode 100644
index 75fa261..0000000
--- a/lib/CodeGen/JumpInstrTables.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief An implementation of jump-instruction tables.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jt"
-
-#include "llvm/CodeGen/JumpInstrTables.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <vector>
-
-using namespace llvm;
-
-char JumpInstrTables::ID = 0;
-
-INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables",
- "Jump-Instruction Tables", true, true)
-INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
-INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables",
- "Jump-Instruction Tables", true, true)
-
-STATISTIC(NumJumpTables, "Number of indirect call tables generated");
-STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables");
-
-ModulePass *llvm::createJumpInstrTablesPass() {
- // The default implementation uses a single table for all functions.
- return new JumpInstrTables(JumpTable::Single);
-}
-
-ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) {
- return new JumpInstrTables(JTT);
-}
-
-namespace {
-static const char jump_func_prefix[] = "__llvm_jump_instr_table_";
-static const char jump_section_prefix[] = ".jump.instr.table.text.";
-
-// Checks to see if a given CallSite is making an indirect call, including
-// cases where the indirect call is made through a bitcast.
-bool isIndirectCall(CallSite &CS) {
- if (CS.getCalledFunction())
- return false;
-
- // Check the value to see if it is merely a bitcast of a function. In
- // this case, it will translate to a direct function call in the resulting
- // assembly, so we won't treat it as an indirect call here.
- const Value *V = CS.getCalledValue();
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
- }
-
- // Otherwise, since we know it's a call, it must be an indirect call
- return true;
-}
-
-// Replaces Functions and GlobalAliases with a different Value.
-bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) {
- User *Us = U->getUser();
- if (!Us)
- return false;
- if (Instruction *I = dyn_cast<Instruction>(Us)) {
- CallSite CS(I);
-
- // Don't do the replacement if this use is a direct call to this function.
- // If the use is not the called value, then replace it.
- if (CS && (isIndirectCall(CS) || CS.isCallee(U))) {
- return false;
- }
-
- U->set(V);
- } else if (Constant *C = dyn_cast<Constant>(Us)) {
- // Don't replace calls to bitcasts of function symbols, since they get
- // translated to direct calls.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) {
- if (CE->getOpcode() == Instruction::BitCast) {
- // This bitcast must have exactly one user.
- if (CE->user_begin() != CE->user_end()) {
- User *ParentUs = *CE->user_begin();
- if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) {
- CallSite CS(CI);
- Use &CEU = *CE->use_begin();
- if (CS.isCallee(&CEU)) {
- return false;
- }
- }
- }
- }
- }
-
- // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier
- // requires alias to point to a defined function. So, GlobalAlias is handled
- // as a separate case in runOnModule.
- if (!isa<GlobalAlias>(C))
- C->replaceUsesOfWithOnConstant(GV, V, U);
- } else {
- llvm_unreachable("The Use of a Function symbol is neither an instruction "
- "nor a constant");
- }
-
- return true;
-}
-
-// Replaces all replaceable address-taken uses of GV with a pointer to a
-// jump-instruction table entry.
-void replaceValueWithFunction(GlobalValue *GV, Function *F) {
- // Go through all uses of this function and replace the uses of GV with the
- // jump-table version of the function. Get the uses as a vector before
- // replacing them, since replacing them changes the use list and invalidates
- // the iterator otherwise.
- for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) {
- Use &U = *I++;
-
- // Replacement of constants replaces all instances in the constant. So, some
- // uses might have already been handled by the time we reach them here.
- if (U.get() == GV)
- replaceGlobalValueIndirectUse(GV, F, &U);
- }
-
- return;
-}
-} // end anonymous namespace
-
-JumpInstrTables::JumpInstrTables()
- : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0),
- JTType(JumpTable::Single) {
- initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT)
- : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) {
- initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTables::~JumpInstrTables() {}
-
-void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<JumpInstrTableInfo>();
-}
-
-Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
- FunctionType *OrigFunTy = Target->getFunctionType();
- FunctionType *FunTy = transformType(JTType, OrigFunTy);
-
- JumpMap::iterator it = Metadata.find(FunTy);
- if (Metadata.end() == it) {
- struct TableMeta Meta;
- Meta.TableNum = TableCount;
- Meta.Count = 0;
- Metadata[FunTy] = Meta;
- it = Metadata.find(FunTy);
- ++NumJumpTables;
- ++TableCount;
- }
-
- it->second.Count++;
-
- std::string NewName(jump_func_prefix);
- NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str();
- Function *JumpFun =
- Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M);
- // The section for this table
- JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str());
- JITI->insertEntry(FunTy, Target, JumpFun);
-
- ++NumFuncsInJumpTables;
- return JumpFun;
-}
-
-bool JumpInstrTables::hasTable(FunctionType *FunTy) {
- FunctionType *TransTy = transformType(JTType, FunTy);
- return Metadata.end() != Metadata.find(TransTy);
-}
-
-FunctionType *JumpInstrTables::transformType(JumpTable::JumpTableType JTT,
- FunctionType *FunTy) {
- // Returning nullptr forces all types into the same table, since all types map
- // to the same type
- Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
-
- // Ignore the return type.
- Type *RetTy = VoidPtrTy;
- bool IsVarArg = FunTy->isVarArg();
- std::vector<Type *> ParamTys(FunTy->getNumParams());
- FunctionType::param_iterator PI, PE;
- int i = 0;
-
- std::vector<Type *> EmptyParams;
- Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
- FunctionType *VoidFnTy = FunctionType::get(
- Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
- switch (JTT) {
- case JumpTable::Single:
-
- return FunctionType::get(RetTy, EmptyParams, false);
- case JumpTable::Arity:
- // Transform all types to void* so that all functions with the same arity
- // end up in the same table.
- for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
- PI++, i++) {
- ParamTys[i] = VoidPtrTy;
- }
-
- return FunctionType::get(RetTy, ParamTys, IsVarArg);
- case JumpTable::Simplified:
- // Project all parameters types to one of 3 types: composite, integer, and
- // function, matching the three subclasses of Type.
- for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
- ++PI, ++i) {
- assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) ||
- isa<CompositeType>(*PI)) &&
- "This type is not an Integer or a Composite or a Function");
- if (isa<CompositeType>(*PI)) {
- ParamTys[i] = VoidPtrTy;
- } else if (isa<FunctionType>(*PI)) {
- ParamTys[i] = VoidFnTy;
- } else if (isa<IntegerType>(*PI)) {
- ParamTys[i] = Int32Ty;
- }
- }
-
- return FunctionType::get(RetTy, ParamTys, IsVarArg);
- case JumpTable::Full:
- // Don't transform this type at all.
- return FunTy;
- }
-
- return nullptr;
-}
-
-bool JumpInstrTables::runOnModule(Module &M) {
- JITI = &getAnalysis<JumpInstrTableInfo>();
-
- // Get the set of jumptable-annotated functions that have their address taken.
- DenseMap<Function *, Function *> Functions;
- for (Function &F : M) {
- if (F.hasFnAttribute(Attribute::JumpTable) && F.hasAddressTaken()) {
- assert(F.hasUnnamedAddr() &&
- "Attribute 'jumptable' requires 'unnamed_addr'");
- Functions[&F] = nullptr;
- }
- }
-
- // Create the jump-table functions.
- for (auto &KV : Functions) {
- Function *F = KV.first;
- KV.second = insertEntry(M, F);
- }
-
- // GlobalAlias is a special case, because the target of an alias statement
- // must be a defined function. So, instead of replacing a given function in
- // the alias, we replace all uses of aliases that target jumptable functions.
- // Note that there's no need to create these functions, since only aliases
- // that target known jumptable functions are replaced, and there's no way to
- // put the jumptable annotation on a global alias.
- DenseMap<GlobalAlias *, Function *> Aliases;
- for (GlobalAlias &GA : M.aliases()) {
- Constant *Aliasee = GA.getAliasee();
- if (Function *F = dyn_cast<Function>(Aliasee)) {
- auto it = Functions.find(F);
- if (it != Functions.end()) {
- Aliases[&GA] = it->second;
- }
- }
- }
-
- // Replace each address taken function with its jump-instruction table entry.
- for (auto &KV : Functions)
- replaceValueWithFunction(KV.first, KV.second);
-
- for (auto &KV : Aliases)
- replaceValueWithFunction(KV.first, KV.second);
-
- return !Functions.empty();
-}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 9c23368..0fb0c46 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,12 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/ForwardControlFlowIntegrity.h"
-#include "llvm/CodeGen/JumpInstrTables.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -33,12 +30,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -50,8 +43,16 @@ EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
void LLVMTargetMachine::initAsmInfo() {
- MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
- *getSubtargetImpl()->getRegisterInfo(), getTargetTriple());
+ MRI = TheTarget.createMCRegInfo(getTargetTriple());
+ MII = TheTarget.createMCInstrInfo();
+ // FIXME: Having an MCSubtargetInfo on the target machine is a hack due
+ // to some backends having subtarget feature dependent module level
+ // code generation. This is similar to the hack in the AsmPrinter for
+ // module level assembly etc.
+ STI = TheTarget.createMCSubtargetInfo(getTargetTriple(), getTargetCPU(),
+ getTargetFeatureString());
+
+ MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*MRI, getTargetTriple());
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
// and if the old one gets included then MCAsmInfo will be NULL and
// we'll crash later.
@@ -69,12 +70,13 @@ void LLVMTargetMachine::initAsmInfo() {
AsmInfo = TmpAsmInfo;
}
-LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+ StringRef DataLayoutString,
+ StringRef Triple, StringRef CPU,
+ StringRef FS, TargetOptions Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : TargetMachine(T, Triple, CPU, FS, Options) {
+ : TargetMachine(T, DataLayoutString, Triple, CPU, FS, Options) {
CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
}
@@ -115,8 +117,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
MachineModuleInfo *MMI = new MachineModuleInfo(
- *TM->getMCAsmInfo(), *TM->getSubtargetImpl()->getRegisterInfo(),
- TM->getObjFileLowering());
+ *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering());
PM.add(MMI);
// Set up a MachineFunction for the rest of CodeGen to work on.
@@ -145,16 +146,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) {
- // Passes to handle jumptable function annotations. These can't be handled at
- // JIT time, so we don't add them directly to addPassesToGenerateCode.
- PM.add(createJumpInstrTableInfoPass(
- getSubtargetImpl()->getInstrInfo()->getJumpInstrTableEntryBound()));
- PM.add(createJumpInstrTablesPass(Options.JTType));
- if (Options.FCFI)
- PM.add(createForwardControlFlowIntegrityPass(
- Options.JTType, Options.CFIType, Options.CFIEnforcing,
- Options.getCFIFuncName()));
-
// Add common CodeGen passes.
MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
StartAfter, StopAfter);
@@ -174,22 +165,22 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
if (Options.MCOptions.MCSaveTempLabels)
Context->setAllowTemporaryLabels(false);
- const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCAsmInfo &MAI = *getMCAsmInfo();
- const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
- const MCInstrInfo &MII = *getSubtargetImpl()->getInstrInfo();
+ const MCRegisterInfo &MRI = *getMCRegisterInfo();
+ const MCInstrInfo &MII = *getMCInstrInfo();
+
std::unique_ptr<MCStreamer> AsmStreamer;
switch (FileType) {
case CGFT_AssemblyFile: {
- MCInstPrinter *InstPrinter =
- getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
- MII, MRI, STI);
+ MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(
+ MAI.getAssemblerDialect(), MAI, MII, MRI, STI);
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = nullptr;
if (Options.MCOptions.ShowMCEncoding)
- MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context);
+ MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
@@ -203,17 +194,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI,
- *Context);
+ MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
if (!MCE || !MAB)
return true;
- AsmStreamer.reset(
- getTarget()
- .createMCObjectStreamer(getTargetTriple(), *Context, *MAB, Out, MCE,
- STI, Options.MCOptions.MCRelaxAll));
+ Triple T(getTargetTriple());
+ AsmStreamer.reset(getTarget().createMCObjectStreamer(
+ T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ /*DWARFMustBeAtTheEnd*/ true));
break;
}
case CGFT_Null:
@@ -253,18 +243,19 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- const MCRegisterInfo &MRI = *getSubtargetImpl()->getRegisterInfo();
- const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(
- *getSubtargetImpl()->getInstrInfo(), MRI, STI, *Ctx);
+ const MCRegisterInfo &MRI = *getMCRegisterInfo();
+ MCCodeEmitter *MCE =
+ getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
if (!MCE || !MAB)
return true;
+ Triple T(getTargetTriple());
+ const MCSubtargetInfo &STI = *getMCSubtargetInfo();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
- getTargetTriple(), *Ctx, *MAB, Out, MCE, STI,
- Options.MCOptions.MCRelaxAll));
+ T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+ /*DWARFMustBeAtTheEnd*/ true));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer =
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index cdf505e..4321849 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -138,16 +138,3 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
std::swap(*I, Queue.back());
Queue.pop_back();
}
-
-#ifdef NDEBUG
-void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
-#else
-void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
- LatencyPriorityQueue q = *this;
- while (!q.empty()) {
- SUnit *su = q.pop();
- dbgs() << "Height " << su->getHeight() << ": ";
- su->dump(DAG);
- }
-}
-#endif
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index dc936a3..e3791be 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -276,7 +277,7 @@ public:
/// getDebugLoc - Return DebugLoc of this UserValue.
DebugLoc getDebugLoc() { return dl;}
- void print(raw_ostream&, const TargetMachine*);
+ void print(raw_ostream &, const TargetRegisterInfo *);
};
} // namespace
@@ -362,7 +363,7 @@ public:
};
} // namespace
-void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
DIVariable DV(Variable);
OS << "!\"";
DV.printExtendedName(OS);
@@ -378,7 +379,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
}
for (unsigned i = 0, e = locations.size(); i != e; ++i) {
OS << " Loc" << i << '=';
- locations[i].print(OS, TM);
+ locations[i].print(OS, TRI);
}
OS << '\n';
}
@@ -386,7 +387,7 @@ void UserValue::print(raw_ostream &OS, const TargetMachine *TM) {
void LDVImpl::print(raw_ostream &OS) {
OS << "********** DEBUG VARIABLES **********\n";
for (unsigned i = 0, e = userValues.size(); i != e; ++i)
- userValues[i]->print(OS, &MF->getTarget());
+ userValues[i]->print(OS, TRI);
}
void UserValue::coalesceLocation(unsigned LocNo) {
@@ -1004,7 +1005,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
return;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
+ DEBUG(userValues[i]->print(dbgs(), TRI));
userValues[i]->rewriteLocations(*VRM, *TRI);
userValues[i]->emitDebugValues(VRM, *LIS, *TII);
}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index fd7516d..2afd7fa 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -32,6 +32,7 @@
#include <algorithm>
using namespace llvm;
+namespace {
//===----------------------------------------------------------------------===//
// Implementation of various methods necessary for calculation of live ranges.
// The implementation of the methods abstracts from the concrete type of the
@@ -293,6 +294,7 @@ private:
return I;
}
};
+} // namespace
//===----------------------------------------------------------------------===//
// LiveRange methods
@@ -567,13 +569,9 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
/// Also remove the value# from value# list.
void LiveRange::removeValNo(VNInfo *ValNo) {
if (empty()) return;
- iterator I = end();
- iterator E = begin();
- do {
- --I;
- if (I->valno == ValNo)
- segments.erase(I);
- } while (I != E);
+ segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) {
+ return S.valno == ValNo;
+ }), end());
// Now that ValNo is dead, remove it.
markValNoForDeletion(ValNo);
}
@@ -747,7 +745,6 @@ void LiveRange::flushSegmentSet() {
segments.empty() &&
"segment set can be used only initially before switching to the array");
segments.append(segmentSet->begin(), segmentSet->end());
- delete segmentSet;
segmentSet = nullptr;
verify();
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index cc08045..adca4cc 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -199,7 +199,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- LRCalc->calculate(LI);
+ LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
computeDeadValues(LI, nullptr);
}
@@ -466,7 +466,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
// Is the register live before? Otherwise we may have to add a read-undef
// flag for subregister defs.
- if (MRI->tracksSubRegLiveness()) {
+ if (MRI->shouldTrackSubRegLiveness(LI.reg)) {
if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
MachineInstr *MI = getInstructionFromIndex(Def);
MI->addRegisterDefReadUndef(LI.reg);
@@ -662,7 +662,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
}
- if (MRI->tracksSubRegLiveness()) {
+ if (MRI->subRegLivenessEnabled()) {
SRs.clear();
for (const LiveInterval::SubRange &SR : LI.subranges()) {
SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end)));
@@ -700,7 +700,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
goto CancelKill;
}
- if (MRI->tracksSubRegLiveness()) {
+ if (MRI->subRegLivenessEnabled()) {
// When reading a partial undefined value we must not add a kill flag.
// The regalloc might have used the undef lane for something else.
// Example:
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 7efd941..89567ef 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index d804b39..45e7265 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -50,7 +50,7 @@ static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc,
LR.createDeadDef(DefIdx, Alloc);
}
-void LiveRangeCalc::calculate(LiveInterval &LI) {
+void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
assert(MRI && Indexes && "call reset() first");
// Step 1: Create minimal live segments for every definition of Reg.
@@ -63,7 +63,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI) {
continue;
unsigned SubReg = MO.getSubReg();
- if (LI.hasSubRanges() || (SubReg != 0 && MRI->tracksSubRegLiveness())) {
+ if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
: MRI->getMaxLaneMaskForVReg(Reg);
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 90bf971..34d9953 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -187,7 +187,7 @@ public:
/// Calculates liveness for the register specified in live interval @p LI.
/// Creates subregister live ranges as needed if subreg liveness tracking is
/// enabled.
- void calculate(LiveInterval &LI);
+ void calculate(LiveInterval &LI, bool TrackSubRegs);
//===--------------------------------------------------------------------===//
// Low-level interface.
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 8a6ac25..5c9c679 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -61,8 +61,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
assert(Slot >= 0 && "Spill slot indice must be >= 0");
SS2IntervalMap::iterator I = S2IMap.find(Slot);
if (I == S2IMap.end()) {
- I = S2IMap.insert(I, std::make_pair(Slot,
- LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+ I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot),
+ std::forward_as_tuple(
+ TargetRegisterInfo::index2StackSlot(Slot), 0.0F))
+ .first;
S2RCMap.insert(std::make_pair(Slot, RC));
} else {
// Use the largest common subclass register class.
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index c4bca5f..11deb81 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index e8bf687..8378429 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -252,7 +252,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
static inline bool
-lookupCandidateBaseReg(int64_t BaseOffset,
+lookupCandidateBaseReg(unsigned BaseReg,
+ int64_t BaseOffset,
int64_t FrameSizeAdjust,
int64_t LocalFrameOffset,
const MachineInstr *MI,
@@ -260,7 +261,7 @@ lookupCandidateBaseReg(int64_t BaseOffset,
// Check if the relative offset from the where the base register references
// to the target address is in range for the instruction.
int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
- return TRI->isFrameOffsetLegal(MI, Offset);
+ return TRI->isFrameOffsetLegal(MI, BaseReg, Offset);
}
bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -362,8 +363,9 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// instruction itself will be taken into account by the target,
// so we don't have to adjust for it here when reusing a base
// register.
- if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
- LocalOffset, MI, TRI)) {
+ if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset,
+ FrameSizeAdjust, LocalOffset, MI,
+ TRI)) {
DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n");
// We found a register to reuse.
Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
@@ -382,7 +384,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// then don't bother creating it.
if (ref + 1 >= e ||
!lookupCandidateBaseReg(
- BaseOffset, FrameSizeAdjust,
+ BaseReg, BaseOffset, FrameSizeAdjust,
FrameReferenceInsns[ref + 1].getLocalOffset(),
FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) {
BaseOffset = PrevBaseOffset;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 3c73905..98359b1 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -307,7 +307,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << '\t';
if (I->isInsideBundle())
OS << " * ";
- I->print(OS, &getParent()->getTarget());
+ I->print(OS);
}
// Print the successors of this block according to the CFG.
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 1b5c1f1..ecc50c9 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -40,13 +41,14 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
-#define DEBUG_TYPE "block-placement2"
+#define DEBUG_TYPE "block-placement"
STATISTIC(NumCondBranches, "Number of conditional branches");
STATISTIC(NumUncondBranches, "Number of uncondittional branches");
@@ -61,11 +63,23 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
cl::init(0), cl::Hidden);
// FIXME: Find a good default for this flag and remove the flag.
-static cl::opt<unsigned>
-ExitBlockBias("block-placement-exit-block-bias",
- cl::desc("Block frequency percentage a loop exit block needs "
- "over the original exit to be considered the new exit."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> ExitBlockBias(
+ "block-placement-exit-block-bias",
+ cl::desc("Block frequency percentage a loop exit block needs "
+ "over the original exit to be considered the new exit."),
+ cl::init(0), cl::Hidden);
+
+static cl::opt<bool> OutlineOptionalBranches(
+ "outline-optional-branches",
+ cl::desc("Put completely optional branches, i.e. branches with a common "
+ "post dominator, out of line."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> OutlineOptionalThreshold(
+ "outline-optional-threshold",
+ cl::desc("Don't outline optional branches that are a single block with an "
+ "instruction count below this threshold"),
+ cl::init(4), cl::Hidden);
namespace {
class BlockChain;
@@ -107,7 +121,7 @@ public:
/// function. It also registers itself as the chain that block participates
/// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
- : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+ : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
assert(BB && "Cannot create a chain with a null basic block");
BlockToChain[BB] = this;
}
@@ -144,19 +158,18 @@ public:
// Update the incoming blocks to point to this chain, and add them to the
// chain structure.
- for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
- BI != BE; ++BI) {
- Blocks.push_back(*BI);
- assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
- BlockToChain[*BI] = this;
+ for (MachineBasicBlock *ChainBB : *Chain) {
+ Blocks.push_back(ChainBB);
+ assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
+ BlockToChain[ChainBB] = this;
}
}
#ifndef NDEBUG
/// \brief Dump the blocks in this chain.
LLVM_DUMP_METHOD void dump() {
- for (iterator I = begin(), E = end(); I != E; ++I)
- (*I)->dump();
+ for (MachineBasicBlock *MBB : *this)
+ MBB->dump();
}
#endif // NDEBUG
@@ -188,6 +201,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A handle to the target's lowering info.
const TargetLoweringBase *TLI;
+ /// \brief A handle to the post dominator tree.
+ MachineDominatorTree *MDT;
+
+ /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
+ /// all terminators of the MachineFunction.
+ SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
+
/// \brief Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -205,28 +225,26 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// between basic blocks.
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
- void markChainSuccessors(BlockChain &Chain,
- MachineBasicBlock *LoopHeaderBB,
+ void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
BlockChain &Chain,
const BlockFilterSet *BlockFilter);
- MachineBasicBlock *selectBestCandidateBlock(
- BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *getFirstUnplacedBlock(
- MachineFunction &F,
- const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *
+ selectBestCandidateBlock(BlockChain &Chain,
+ SmallVectorImpl<MachineBasicBlock *> &WorkList,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *
+ getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
- MachineBasicBlock *findBestLoopExit(MachineFunction &F,
- MachineLoop &L,
+ MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
void buildLoopChains(MachineFunction &F, MachineLoop &L);
void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
@@ -244,6 +262,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -252,12 +271,13 @@ public:
char MachineBlockPlacement::ID = 0;
char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
#ifndef NDEBUG
@@ -267,8 +287,8 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
static std::string getBlockName(MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
- OS << "BB#" << BB->getNumber()
- << " (derived from LLVM BB '" << BB->getName() << "')";
+ OS << "BB#" << BB->getNumber();
+ OS << " (derived from LLVM BB '" << BB->getName() << "')";
OS.flush();
return Result;
}
@@ -292,26 +312,22 @@ static std::string getBlockNum(MachineBasicBlock *BB) {
/// having one fewer active predecessor. It also adds any successors of this
/// chain which reach the zero-predecessor state to the worklist passed in.
void MachineBlockPlacement::markChainSuccessors(
- BlockChain &Chain,
- MachineBasicBlock *LoopHeaderBB,
+ BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
- for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
- CBI != CBE; ++CBI) {
+ for (MachineBasicBlock *MBB : Chain) {
// Add any successors for which this is the only un-placed in-loop
// predecessor to the worklist as a viable candidate for CFG-neutral
// placement. No subsequent placement of this block will violate the CFG
// shape, so we get to use heuristics to choose a favorable placement.
- for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
- SE = (*CBI)->succ_end();
- SI != SE; ++SI) {
- if (BlockFilter && !BlockFilter->count(*SI))
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (BlockFilter && !BlockFilter->count(Succ))
continue;
- BlockChain &SuccChain = *BlockToChain[*SI];
+ BlockChain &SuccChain = *BlockToChain[Succ];
// Disregard edges within a fixed chain, or edges to the loop header.
- if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+ if (&Chain == &SuccChain || Succ == LoopHeaderBB)
continue;
// This is a cross-chain edge that is within the loop, so decrement the
@@ -331,9 +347,10 @@ void MachineBlockPlacement::markChainSuccessors(
/// very hot successor edges.
///
/// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
- MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
+MachineBasicBlock *
+MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
+ BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
const BranchProbability HotProb(4, 5); // 80%
MachineBasicBlock *BestSucc = nullptr;
@@ -363,6 +380,30 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+ // If we outline optional branches, look whether Succ is unavoidable, i.e.
+ // dominates all terminators of the MachineFunction. If it does, other
+ // successors must be optional. Don't do this for cold branches.
+ if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() &&
+ UnavoidableBlocks.count(Succ) > 0) {
+ auto HasShortOptionalBranch = [&]() {
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Check whether there is an unplaced optional branch.
+ if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
+ BlockToChain[Pred] == &Chain)
+ continue;
+ // Check whether the optional branch has exactly one BB.
+ if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ continue;
+ // Check whether the optional branch is small.
+ if (Pred->size() < OutlineOptionalThreshold)
+ return true;
+ }
+ return false;
+ };
+ if (!HasShortOptionalBranch())
+ return Succ;
+ }
+
// Only consider successors which are either "hot", or wouldn't violate
// any CFG constraints.
if (SuccChain.LoopPredecessors != 0) {
@@ -426,29 +467,26 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
// some code complexity) into the loop below.
WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
[&](MachineBasicBlock *BB) {
- return BlockToChain.lookup(BB) == &Chain;
- }),
+ return BlockToChain.lookup(BB) == &Chain;
+ }),
WorkList.end());
MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
- for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
- WBE = WorkList.end();
- WBI != WBE; ++WBI) {
- BlockChain &SuccChain = *BlockToChain[*WBI];
+ for (MachineBasicBlock *MBB : WorkList) {
+ BlockChain &SuccChain = *BlockToChain[MBB];
if (&SuccChain == &Chain) {
- DEBUG(dbgs() << " " << getBlockName(*WBI)
- << " -> Already merged!\n");
+ DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n");
continue;
}
assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
- BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
- DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> ";
- MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
+ BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
+ DEBUG(dbgs() << " " << getBlockName(MBB) << " -> ";
+ MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
if (BestBlock && BestFreq >= CandidateFreq)
continue;
- BestBlock = *WBI;
+ BestBlock = MBB;
BestFreq = CandidateFreq;
}
return BestBlock;
@@ -481,8 +519,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
}
void MachineBlockPlacement::buildChain(
- MachineBasicBlock *BB,
- BlockChain &Chain,
+ MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
const BlockFilterSet *BlockFilter) {
assert(BB);
@@ -509,8 +546,8 @@ void MachineBlockPlacement::buildChain(
BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
if (!BestSucc) {
- BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
- BlockFilter);
+ BestSucc =
+ getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter);
if (!BestSucc)
break;
@@ -523,8 +560,8 @@ void MachineBlockPlacement::buildChain(
// Zero out LoopPredecessors for the successor we're about to merge in case
// we selected a successor that didn't fit naturally into the CFG.
SuccChain.LoopPredecessors = 0;
- DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
- << " to " << getBlockNum(BestSucc) << "\n");
+ DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to "
+ << getBlockNum(BestSucc) << "\n");
markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
Chain.merge(BestSucc, &SuccChain);
BB = *std::prev(Chain.end());
@@ -554,20 +591,17 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
if (!LoopBlockSet.count(*HeaderChain.begin()))
return L.getHeader();
- DEBUG(dbgs() << "Finding best loop top for: "
- << getBlockName(L.getHeader()) << "\n");
+ DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
+ << "\n");
BlockFrequency BestPredFreq;
MachineBasicBlock *BestPred = nullptr;
- for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
- PE = L.getHeader()->pred_end();
- PI != PE; ++PI) {
- MachineBasicBlock *Pred = *PI;
+ for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
if (!LoopBlockSet.count(Pred))
continue;
DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", "
<< Pred->succ_size() << " successors, ";
- MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
+ MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
if (Pred->succ_size() > 1)
continue;
@@ -594,15 +628,13 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
return BestPred;
}
-
/// \brief Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
- MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
@@ -624,15 +656,13 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
// blocks where rotating to exit with that block will reach an outer loop.
SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
- DEBUG(dbgs() << "Finding best loop exit for: "
- << getBlockName(L.getHeader()) << "\n");
- for (MachineLoop::block_iterator I = L.block_begin(),
- E = L.block_end();
- I != E; ++I) {
- BlockChain &Chain = *BlockToChain[*I];
+ DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
+ << "\n");
+ for (MachineBasicBlock *MBB : L.getBlocks()) {
+ BlockChain &Chain = *BlockToChain[MBB];
// Ensure that this block is at the end of a chain; otherwise it could be
// mid-way through an inner loop or a successor of an analyzable branch.
- if (*I != *std::prev(Chain.end()))
+ if (MBB != *std::prev(Chain.end()))
continue;
// Now walk the successors. We need to establish whether this has a viable
@@ -646,43 +676,40 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
// the MBPI analysis, we use the internal weights and manually compute the
// probabilities to avoid quadratic behavior.
uint32_t WeightScale = 0;
- uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
- for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
- SE = (*I)->succ_end();
- SI != SE; ++SI) {
- if ((*SI)->isLandingPad())
+ uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ->isLandingPad())
continue;
- if (*SI == *I)
+ if (Succ == MBB)
continue;
- BlockChain &SuccChain = *BlockToChain[*SI];
+ BlockChain &SuccChain = *BlockToChain[Succ];
// Don't split chains, either this chain or the successor's chain.
if (&Chain == &SuccChain) {
- DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " (chain conflict)\n");
+ DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (chain conflict)\n");
continue;
}
- uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
- if (LoopBlockSet.count(*SI)) {
- DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+ uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
+ if (LoopBlockSet.count(Succ)) {
+ DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " (" << SuccWeight << ")\n");
HasLoopingSucc = true;
continue;
}
unsigned SuccLoopDepth = 0;
- if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) {
+ if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
SuccLoopDepth = ExitLoop->getLoopDepth();
if (ExitLoop->contains(&L))
- BlocksExitingToOuterLoop.insert(*I);
+ BlocksExitingToOuterLoop.insert(MBB);
}
BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
- BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
- DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> "
- << getBlockName(*SI) << " [L:" << SuccLoopDepth
- << "] (";
- MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
+ BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
+ DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> "
+ << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
+ MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
// Note that we bias this toward an existing layout successor to retain
// incoming order in the absence of better information. The exit must have
// a frequency higher than the current exit before we consider breaking
@@ -690,10 +717,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
BranchProbability Bias(100 - ExitBlockBias, 100);
if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth ||
ExitEdgeFreq > BestExitEdgeFreq ||
- ((*I)->isLayoutSuccessor(*SI) &&
+ (MBB->isLayoutSuccessor(Succ) &&
!(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
BestExitEdgeFreq = ExitEdgeFreq;
- ExitingBB = *I;
+ ExitingBB = MBB;
}
}
@@ -734,12 +761,10 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
MachineBasicBlock *Top = *LoopChain.begin();
bool ViableTopFallthrough = false;
- for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(),
- PE = Top->pred_end();
- PI != PE; ++PI) {
- BlockChain *PredChain = BlockToChain[*PI];
- if (!LoopBlockSet.count(*PI) &&
- (!PredChain || *PI == *std::prev(PredChain->end()))) {
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
ViableTopFallthrough = true;
break;
}
@@ -750,18 +775,16 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
// introduce an unnecessary branch.
if (ViableTopFallthrough) {
MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
- for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(),
- SE = Bottom->succ_end();
- SI != SE; ++SI) {
- BlockChain *SuccChain = BlockToChain[*SI];
- if (!LoopBlockSet.count(*SI) &&
- (!SuccChain || *SI == *SuccChain->begin()))
+ for (MachineBasicBlock *Succ : Bottom->successors()) {
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (!LoopBlockSet.count(Succ) &&
+ (!SuccChain || Succ == *SuccChain->begin()))
return;
}
}
- BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(),
- ExitingBB);
+ BlockChain::iterator ExitIt =
+ std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
if (ExitIt == LoopChain.end())
return;
@@ -778,8 +801,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner
// loops.
- for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
- buildLoopChains(F, **LI);
+ for (MachineLoop *InnerLoop : L)
+ buildLoopChains(F, *InnerLoop);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
@@ -805,21 +828,16 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
assert(LoopChain.LoopPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineLoop::block_iterator BI = L.block_begin(),
- BE = L.block_end();
- BI != BE; ++BI) {
- BlockChain &Chain = *BlockToChain[*BI];
+ for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+ BlockChain &Chain = *BlockToChain[LoopBB];
if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
- for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
- BCI != BCE; ++BCI) {
- assert(BlockToChain[*BCI] == &Chain);
- for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
- PE = (*BCI)->pred_end();
- PI != PE; ++PI) {
- if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred))
continue;
++Chain.LoopPredecessors;
}
@@ -841,29 +859,26 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
}
- for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
- BCI != BCE; ++BCI) {
- dbgs() << " ... " << getBlockName(*BCI) << "\n";
- if (!LoopBlockSet.erase(*BCI)) {
+ for (MachineBasicBlock *ChainBB : LoopChain) {
+ dbgs() << " ... " << getBlockName(ChainBB) << "\n";
+ if (!LoopBlockSet.erase(ChainBB)) {
// We don't mark the loop as bad here because there are real situations
// where this can occur. For example, with an unanalyzable fallthrough
// from a loop block to a non-loop block or vice versa.
dbgs() << "Loop chain contains a block not contained by the loop!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
- << " Bad block: " << getBlockName(*BCI) << "\n";
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
}
}
if (!LoopBlockSet.empty()) {
BadLoop = true;
- for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
- LBE = LoopBlockSet.end();
- LBI != LBE; ++LBI)
+ for (MachineBasicBlock *LoopBB : LoopBlockSet)
dbgs() << "Loop contains blocks never placed into a chain!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
- << " Bad block: " << getBlockName(*LBI) << "\n";
+ << " Bad block: " << getBlockName(LoopBB) << "\n";
}
assert(!BadLoop && "Detected problems with the placement of this loop.");
});
@@ -875,8 +890,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
MachineBasicBlock *BB = FI;
- BlockChain *Chain
- = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+ BlockChain *Chain =
+ new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
// Also, merge any blocks which we cannot reason about and must preserve
// the exact fallthrough behavior for.
for (;;) {
@@ -899,28 +914,44 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
}
}
+ if (OutlineOptionalBranches) {
+ // Find the nearest common dominator of all of F's terminators.
+ MachineBasicBlock *Terminator = nullptr;
+ for (MachineBasicBlock &MBB : F) {
+ if (MBB.succ_size() == 0) {
+ if (Terminator == nullptr)
+ Terminator = &MBB;
+ else
+ Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
+ }
+ }
+
+ // MBBs dominating this common dominator are unavoidable.
+ UnavoidableBlocks.clear();
+ for (MachineBasicBlock &MBB : F) {
+ if (MDT->dominates(&MBB, Terminator)) {
+ UnavoidableBlocks.insert(&MBB);
+ }
+ }
+ }
+
// Build any loop-based chains.
- for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
- ++LI)
- buildLoopChains(F, **LI);
+ for (MachineLoop *L : *MLI)
+ buildLoopChains(F, *L);
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallPtrSet<BlockChain *, 4> UpdatedPreds;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
- MachineBasicBlock *BB = &*FI;
- BlockChain &Chain = *BlockToChain[BB];
+ for (MachineBasicBlock &MBB : F) {
+ BlockChain &Chain = *BlockToChain[&MBB];
if (!UpdatedPreds.insert(&Chain).second)
continue;
assert(Chain.LoopPredecessors == 0);
- for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
- BCI != BCE; ++BCI) {
- assert(BlockToChain[*BCI] == &Chain);
- for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
- PE = (*BCI)->pred_end();
- PI != PE; ++PI) {
- if (BlockToChain[*PI] == &Chain)
+ for (MachineBasicBlock *ChainBB : Chain) {
+ assert(BlockToChain[ChainBB] == &Chain);
+ for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
+ if (BlockToChain[Pred] == &Chain)
continue;
++Chain.LoopPredecessors;
}
@@ -940,46 +971,40 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Crash at the end so we get all of the debugging output first.
bool BadFunc = false;
FunctionBlockSetType FunctionBlockSet;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
- FunctionBlockSet.insert(FI);
+ for (MachineBasicBlock &MBB : F)
+ FunctionBlockSet.insert(&MBB);
- for (BlockChain::iterator BCI = FunctionChain.begin(),
- BCE = FunctionChain.end();
- BCI != BCE; ++BCI)
- if (!FunctionBlockSet.erase(*BCI)) {
+ for (MachineBasicBlock *ChainBB : FunctionChain)
+ if (!FunctionBlockSet.erase(ChainBB)) {
BadFunc = true;
dbgs() << "Function chain contains a block not in the function!\n"
- << " Bad block: " << getBlockName(*BCI) << "\n";
+ << " Bad block: " << getBlockName(ChainBB) << "\n";
}
if (!FunctionBlockSet.empty()) {
BadFunc = true;
- for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
- FBE = FunctionBlockSet.end();
- FBI != FBE; ++FBI)
+ for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
dbgs() << "Function contains blocks never placed into a chain!\n"
- << " Bad block: " << getBlockName(*FBI) << "\n";
+ << " Bad block: " << getBlockName(RemainingBB) << "\n";
}
assert(!BadFunc && "Detected problems with the block placement.");
});
// Splice the blocks into place.
MachineFunction::iterator InsertPos = F.begin();
- for (BlockChain::iterator BI = FunctionChain.begin(),
- BE = FunctionChain.end();
- BI != BE; ++BI) {
- DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
- : " ... ")
- << getBlockName(*BI) << "\n");
- if (InsertPos != MachineFunction::iterator(*BI))
- F.splice(InsertPos, *BI);
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
+ : " ... ")
+ << getBlockName(ChainBB) << "\n");
+ if (InsertPos != MachineFunction::iterator(ChainBB))
+ F.splice(InsertPos, ChainBB);
else
++InsertPos;
// Update the terminator of the previous block.
- if (BI == FunctionChain.begin())
+ if (ChainBB == *FunctionChain.begin())
continue;
- MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(*BI));
+ MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
// FIXME: It would be awesome of updateTerminator would just return rather
// than assert when the branch cannot be analyzed in order to remove this
@@ -989,16 +1014,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
// The "PrevBB" is not yet updated to reflect current code layout, so,
// o. it may fall-through to a block without explict "goto" instruction
- // before layout, and no longer fall-through it after layout; or
+ // before layout, and no longer fall-through it after layout; or
// o. just opposite.
- //
+ //
// AnalyzeBranch() may return erroneous value for FBB when these two
// situations take place. For the first scenario FBB is mistakenly set
// NULL; for the 2nd scenario, the FBB, which is expected to be NULL,
// is mistakenly pointing to "*BI".
//
bool needUpdateBr = true;
- if (!Cond.empty() && (!FBB || FBB == *BI)) {
+ if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
PrevBB->updateTerminator();
needUpdateBr = false;
Cond.clear();
@@ -1018,7 +1043,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
<< getBlockName(PrevBB) << "\n");
DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
<< " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PrevBB);
TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
needUpdateBr = true;
@@ -1042,29 +1067,30 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
return;
if (FunctionChain.begin() == FunctionChain.end())
- return; // Empty chain.
+ return; // Empty chain.
const BranchProbability ColdProb(1, 5); // 20%
BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
- for (BlockChain::iterator BI = std::next(FunctionChain.begin()),
- BE = FunctionChain.end();
- BI != BE; ++BI) {
+ for (MachineBasicBlock *ChainBB : FunctionChain) {
+ if (ChainBB == *FunctionChain.begin())
+ continue;
+
// Don't align non-looping basic blocks. These are unlikely to execute
// enough times to matter in practice. Note that we'll still handle
// unnatural CFGs inside of a natural outer loop (the common case) and
// rotated loops.
- MachineLoop *L = MLI->getLoopFor(*BI);
+ MachineLoop *L = MLI->getLoopFor(ChainBB);
if (!L)
continue;
unsigned Align = TLI->getPrefLoopAlignment(L);
if (!Align)
- continue; // Don't care about loop alignment.
+ continue; // Don't care about loop alignment.
// If the block is cold relative to the function entry don't waste space
// aligning it.
- BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+ BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
if (Freq < WeightedEntryFreq)
continue;
@@ -1077,12 +1103,13 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Check for the existence of a non-layout predecessor which would benefit
// from aligning this block.
- MachineBasicBlock *LayoutPred = *std::prev(BI);
+ MachineBasicBlock *LayoutPred =
+ &*std::prev(MachineFunction::iterator(ChainBB));
// Force alignment if all the predecessors are jumps. We already checked
// that the block isn't cold above.
- if (!LayoutPred->isSuccessor(*BI)) {
- (*BI)->setAlignment(Align);
+ if (!LayoutPred->isSuccessor(ChainBB)) {
+ ChainBB->setAlignment(Align);
continue;
}
@@ -1090,10 +1117,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// cold relative to the block. When this is true, other predecessors make up
// all of the hot entries into the block and thus alignment is likely to be
// important.
- BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+ BranchProbability LayoutProb =
+ MBPI->getEdgeProbability(LayoutPred, ChainBB);
BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
if (LayoutEdgeFreq <= (Freq * ColdProb))
- (*BI)->setAlignment(Align);
+ ChainBB->setAlignment(Align);
}
}
@@ -1110,6 +1138,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
MLI = &getAnalysis<MachineLoopInfo>();
TII = F.getSubtarget().getInstrInfo();
TLI = F.getSubtarget().getTargetLowering();
+ MDT = &getAnalysis<MachineDominatorTree>();
assert(BlockToChain.empty());
buildCFGChains(F);
@@ -1119,9 +1148,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
if (AlignAllBlock)
// Align all of the blocks in the function to a specific alignment.
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- FI->setAlignment(AlignAllBlock);
+ for (MachineBasicBlock &MBB : F)
+ MBB.setAlignment(AlignAllBlock);
// We always return true as we have no way to track whether the final order
// differs from the original order.
@@ -1176,20 +1204,19 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
- for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
- Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
- : NumUncondBranches;
- Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
- : UncondBranchTakenFreq;
- for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
- SE = I->succ_end();
- SI != SE; ++SI) {
+ for (MachineBasicBlock &MBB : F) {
+ BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
+ Statistic &NumBranches =
+ (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
+ Statistic &BranchTakenFreq =
+ (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
+ for (MachineBasicBlock *Succ : MBB.successors()) {
// Skip if this successor is a fallthrough.
- if (I->isLayoutSuccessor(*SI))
+ if (MBB.isLayoutSuccessor(Succ))
continue;
- BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+ BlockFrequency EdgeFreq =
+ BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
++NumBranches;
BranchTakenFreq += EdgeFreq.getFrequency();
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 21b9c5a..f72d72a 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index cbd6272..9611122 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
I != E; ++I) {
unsigned MappedDef = *I;
// Source of copy is no longer available for propagation.
- if (AvailCopyMap.erase(MappedDef)) {
- for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
- AvailCopyMap.erase(*SR);
- }
+ AvailCopyMap.erase(MappedDef);
+ for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
+ AvailCopyMap.erase(*SR);
}
}
}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index df60cf3..467a2e4 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallBitVector.h"
using namespace llvm;
@@ -59,3 +60,68 @@ void MachineDominatorTree::releaseMemory() {
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
DT->print(OS);
}
+
+void MachineDominatorTree::applySplitCriticalEdges() const {
+ // Bail out early if there is nothing to do.
+ if (CriticalEdgesToSplit.empty())
+ return;
+
+ // For each element in CriticalEdgesToSplit, remember whether or not element
+ // is the new immediate domminator of its successor. The mapping is done by
+ // index, i.e., the information for the ith element of CriticalEdgesToSplit is
+ // the ith element of IsNewIDom.
+ SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true);
+ size_t Idx = 0;
+
+ // Collect all the dominance properties info, before invalidating
+ // the underlying DT.
+ for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+ // Update dominator information.
+ MachineBasicBlock *Succ = Edge.ToBB;
+ MachineDomTreeNode *SuccDTNode = DT->getNode(Succ);
+
+ for (MachineBasicBlock *PredBB : Succ->predecessors()) {
+ if (PredBB == Edge.NewBB)
+ continue;
+ // If we are in this situation:
+ // FromBB1 FromBB2
+ // + +
+ // + + + +
+ // + + + +
+ // ... Split1 Split2 ...
+ // + +
+ // + +
+ // +
+ // Succ
+ // Instead of checking the domiance property with Split2, we check it with
+ // FromBB2 since Split2 is still unknown of the underlying DT structure.
+ if (NewBBs.count(PredBB)) {
+ assert(PredBB->pred_size() == 1 && "A basic block resulting from a "
+ "critical edge split has more "
+ "than one predecessor!");
+ PredBB = *PredBB->pred_begin();
+ }
+ if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) {
+ IsNewIDom[Idx] = false;
+ break;
+ }
+ }
+ ++Idx;
+ }
+
+ // Now, update DT with the collected dominance properties info.
+ Idx = 0;
+ for (CriticalEdge &Edge : CriticalEdgesToSplit) {
+ // We know FromBB dominates NewBB.
+ MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom[Idx])
+ DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode);
+ ++Idx;
+ }
+ NewBBs.clear();
+ CriticalEdgesToSplit.clear();
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 151a260..6ceace8 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -54,7 +54,7 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
unsigned FunctionNum, MachineModuleInfo &mmi)
- : Fn(F), Target(TM), STI(TM.getSubtargetImpl()), Ctx(mmi.getContext()),
+ : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
MMI(mmi) {
if (STI->getRegisterInfo())
RegInfo = new (Allocator) MachineRegisterInfo(this);
@@ -584,14 +584,6 @@ int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
return -++NumFixedObjects;
}
-int MachineFrameInfo::CreateFrameAllocation(uint64_t Size) {
- // Force the use of a frame pointer. The intention is that this intrinsic be
- // used in conjunction with unwind mechanisms that leak the frame pointer.
- setFrameAddressIsTaken(true);
- Size = RoundUpToAlignment(Size, StackAlignment);
- return CreateStackObject(Size, StackAlignment, false);
-}
-
BitVector
MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
assert(MBB && "MBB must be valid");
@@ -903,16 +895,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
// DataLayout.
if (isa<PointerType>(A->getType()))
A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
- const_cast<Constant*>(A), TD);
+ const_cast<Constant *>(A), *TD);
else if (A->getType() != IntTy)
A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
- const_cast<Constant*>(A), TD);
+ const_cast<Constant *>(A), *TD);
if (isa<PointerType>(B->getType()))
B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
- const_cast<Constant*>(B), TD);
+ const_cast<Constant *>(B), *TD);
else if (B->getType() != IntTy)
B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
- const_cast<Constant*>(B), TD);
+ const_cast<Constant *>(B), *TD);
return A == B;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 981e4a3..1240efb 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -276,17 +276,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
/// print - Print the specified machine operand.
///
-void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
- // If the instruction is embedded into a basic block, we can find the
- // target info for the instruction.
- if (!TM)
- if (const MachineInstr *MI = getParent())
- if (const MachineBasicBlock *MBB = MI->getParent())
- if (const MachineFunction *MF = MBB->getParent())
- TM = &MF->getTarget();
- const TargetRegisterInfo *TRI =
- TM ? TM->getSubtargetImpl()->getRegisterInfo() : nullptr;
-
+void MachineOperand::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) const {
switch (getType()) {
case MachineOperand::MO_Register:
OS << PrintReg(getReg(), TRI, getSubReg());
@@ -1512,23 +1503,19 @@ void MachineInstr::dump() const {
#endif
}
-static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
- raw_ostream &CommentOS) {
- const LLVMContext &Ctx = MF->getFunction()->getContext();
- DL.print(Ctx, CommentOS);
-}
-
-void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
- bool SkipOpers) const {
- // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+ // We can be a bit tidier if we know the MachineFunction.
const MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
if (const MachineBasicBlock *MBB = getParent()) {
MF = MBB->getParent();
- if (!TM && MF)
- TM = &MF->getTarget();
- if (MF)
+ if (MF) {
MRI = &MF->getRegInfo();
+ TRI = MF->getSubtarget().getRegisterInfo();
+ TII = MF->getSubtarget().getInstrInfo();
+ }
}
// Save a list of virtual registers.
@@ -1541,7 +1528,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
!getOperand(StartOp).isImplicit();
++StartOp) {
if (StartOp != 0) OS << ", ";
- getOperand(StartOp).print(OS, TM);
+ getOperand(StartOp).print(OS, TRI);
unsigned Reg = getOperand(StartOp).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
VirtRegs.push_back(Reg);
@@ -1551,8 +1538,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
OS << " = ";
// Print the opcode name.
- if (TM && TM->getSubtargetImpl()->getInstrInfo())
- OS << TM->getSubtargetImpl()->getInstrInfo()->getName(getOpcode());
+ if (TII)
+ OS << TII->getName(getOpcode());
else
OS << "UNKNOWN";
@@ -1568,7 +1555,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
// Print asm string.
OS << " ";
- getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, TRI);
// Print HasSideEffects, MayLoad, MayStore, IsAlignStack
unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1606,9 +1593,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
if (MRI->use_empty(Reg)) {
bool HasAliasLive = false;
- for (MCRegAliasIterator AI(
- Reg, TM->getSubtargetImpl()->getRegisterInfo(), true);
- AI.isValid(); ++AI) {
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
if (!MRI->use_empty(AliasReg)) {
HasAliasLive = true;
@@ -1641,10 +1626,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (DI.isVariable() && !DIV.getName().empty())
OS << "!\"" << DIV.getName() << '\"';
else
- MO.print(OS, TM);
- } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
- OS << TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIndexName(
- MO.getImm());
+ MO.print(OS, TRI);
+ } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ OS << TRI->getSubRegIndexName(MO.getImm());
} else if (i == AsmDescOp && MO.isImm()) {
// Pretty print the inline asm operand descriptor.
OS << '$' << AsmOpCount++;
@@ -1661,11 +1645,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
unsigned RCID = 0;
if (InlineAsm::hasRegClassConstraint(Flag, RCID)) {
- if (TM) {
- const TargetRegisterInfo *TRI =
- TM->getSubtargetImpl()->getRegisterInfo();
- OS << ':'
- << TRI->getRegClassName(TRI->getRegClass(RCID));
+ if (TRI) {
+ OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
} else
OS << ":RC" << RCID;
}
@@ -1679,7 +1660,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
// Compute the index of the next operand descriptor.
AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
} else
- MO.print(OS, TM);
+ MO.print(OS, TRI);
}
// Briefly indicate whether any call clobbers were omitted.
@@ -1715,7 +1696,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
if (!HaveSemi) OS << ";"; HaveSemi = true;
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
- OS << " " << MRI->getTargetRegisterInfo()->getRegClassName(RC)
+ OS << " " << TRI->getRegClassName(RC)
<< ':' << PrintReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
if (MRI->getRegClass(VirtRegs[j]) != RC) {
@@ -1738,7 +1719,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
if (!InlinedAtDL.isUnknown() && MF) {
OS << " inlined @[ ";
- printDebugLoc(InlinedAtDL, MF, OS);
+ InlinedAtDL.print(OS);
OS << " ]";
}
}
@@ -1747,7 +1728,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
} else if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";";
OS << " dbg:";
- printDebugLoc(debugLoc, MF, OS);
+ debugLoc.print(OS);
}
OS << '\n';
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 64d0932..2f65a2e 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -54,6 +54,12 @@ HoistCheapInsts("hoist-cheap-insts",
cl::desc("MachineLICM should hoist even cheap instructions"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
+ cl::desc("MachineLICM should sink instructions into "
+ "loops to avoid register spills"),
+ cl::init(false), cl::Hidden);
+
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
STATISTIC(NumLowRP,
@@ -243,6 +249,11 @@ namespace {
void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
+ /// SinkIntoLoop - Sink instructions into loops if profitable. This
+ /// especially tries to prevent register spills caused by register pressure
+ /// if there is little to no overhead moving instructions into loops.
+ void SinkIntoLoop();
+
/// getRegisterClassIDAndCost - For a given MI, register, and the operand
/// index, return the ID and cost of its representative register class by
/// reference.
@@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
FirstInLoop = true;
HoistOutOfLoop(N);
CSEMap.clear();
+
+ if (SinkInstsToAvoidSpills)
+ SinkIntoLoop();
}
}
@@ -771,6 +785,53 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
}
}
+void MachineLICM::SinkIntoLoop() {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ SmallVector<MachineInstr *, 8> Candidates;
+ for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
+ I != Preheader->instr_end(); ++I) {
+ // We need to ensure that we can safely move this instruction into the loop.
+ // As such, it must not have side-effects, e.g. such as a call has.
+ if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
+ Candidates.push_back(I);
+ }
+
+ for (MachineInstr *I : Candidates) {
+ const MachineOperand &MO = I->getOperand(0);
+ if (!MO.isDef() || !MO.isReg() || !MO.getReg())
+ continue;
+ if (!MRI->hasOneDef(MO.getReg()))
+ continue;
+ bool CanSink = true;
+ MachineBasicBlock *B = nullptr;
+ for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+ // FIXME: Come up with a proper cost model that estimates whether sinking
+ // the instruction (and thus possibly executing it on every loop
+ // iteration) is more expensive than a register.
+ // For now assumes that copies are cheap and thus almost always worth it.
+ if (!MI.isCopy()) {
+ CanSink = false;
+ break;
+ }
+ if (!B) {
+ B = MI.getParent();
+ continue;
+ }
+ B = DT->findNearestCommonDominator(B, MI.getParent());
+ if (!B) {
+ CanSink = false;
+ break;
+ }
+ }
+ if (!CanSink || !B || B == Preheader)
+ continue;
+ B->splice(B->getFirstNonPHI(), Preheader, I);
+ }
+}
+
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
}
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 89054d4..ce6abdd 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 32b7db1..278a8f2 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -65,7 +65,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const TargetRegisterClass *OldRC = getRegClass(Reg);
const TargetRegisterClass *NewRC =
- getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC);
+ getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF);
// Stop early if there is no room to grow.
if (NewRC == OldRC)
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 89ac6a8..7a3c80b 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -209,6 +209,11 @@ static MachineSchedRegistry
DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
useDefaultMachineSched);
+static cl::opt<bool> EnableMachineSched(
+ "enable-misched",
+ cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
+ cl::Hidden);
+
/// Forward declare the standard machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
@@ -304,6 +309,12 @@ ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
/// design would be to split blocks at scheduling boundaries, but LLVM has a
/// general bias against block splitting purely for implementation simplicity.
bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+ if (EnableMachineSched.getNumOccurrences()) {
+ if (!EnableMachineSched)
+ return false;
+ } else if (!mf.getSubtarget().enableMachineScheduler())
+ return false;
+
DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index bdb094f..991241e 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -397,7 +397,7 @@ void MachineVerifier::report(const char *msg,
assert(MO);
report(msg, MO->getParent());
errs() << "- operand " << MONum << ": ";
- MO->print(errs(), TM);
+ MO->print(errs(), TRI);
errs() << "\n";
}
@@ -739,7 +739,7 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
if (!isUInt<5>(MI->getOperand(1).getImm()))
report("Unknown asm flags", &MI->getOperand(1), 1);
- assert(InlineAsm::MIOp_FirstOperand == 2 && "Asm format changed");
+ static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed");
unsigned OpNo = InlineAsm::MIOp_FirstOperand;
unsigned NumOps;
@@ -927,7 +927,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
TII->getRegClass(MCID, MONum, TRI, *MF)) {
if (SubIdx) {
const TargetRegisterClass *SuperRC =
- TRI->getLargestLegalSuperClass(RC);
+ TRI->getLargestLegalSuperClass(RC, *MF);
if (!SuperRC) {
report("No largest legal super class exists.", MO, MONum);
return;
@@ -1573,7 +1573,8 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
if (!hasRead) {
// When tracking subregister liveness, the main range must start new
// values on partial register writes, even if there is no read.
- if (!MRI->tracksSubRegLiveness() || LaneMask != 0 || !hasSubRegDef) {
+ if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 ||
+ !hasSubRegDef) {
report("Instruction ending live segment doesn't read the register",
MI);
errs() << S << " in " << LR << '\n';
@@ -1649,40 +1650,35 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
}
void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
- verifyLiveRange(LI, LI.reg);
-
unsigned Reg = LI.reg;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- unsigned Mask = 0;
- unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((Mask & SR.LaneMask) != 0)
- report("Lane masks of sub ranges overlap in live interval", MF, LI);
- if ((SR.LaneMask & ~MaxMask) != 0)
- report("Subrange lanemask is invalid", MF, LI);
- Mask |= SR.LaneMask;
- verifyLiveRange(SR, LI.reg, SR.LaneMask);
- if (!LI.covers(SR))
- report("A Subrange is not covered by the main range", MF, LI);
- }
- } else if (LI.hasSubRanges()) {
- report("subregister liveness only allowed for virtual registers", MF, LI);
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ verifyLiveRange(LI, Reg);
+
+ unsigned Mask = 0;
+ unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI.subranges()) {
+ if ((Mask & SR.LaneMask) != 0)
+ report("Lane masks of sub ranges overlap in live interval", MF, LI);
+ if ((SR.LaneMask & ~MaxMask) != 0)
+ report("Subrange lanemask is invalid", MF, LI);
+ Mask |= SR.LaneMask;
+ verifyLiveRange(SR, LI.reg, SR.LaneMask);
+ if (!LI.covers(SR))
+ report("A Subrange is not covered by the main range", MF, LI);
}
// Check the LI only has one connected component.
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- ConnectedVNInfoEqClasses ConEQ(*LiveInts);
- unsigned NumComp = ConEQ.Classify(&LI);
- if (NumComp > 1) {
- report("Multiple connected components in live interval", MF, LI);
- for (unsigned comp = 0; comp != NumComp; ++comp) {
- errs() << comp << ": valnos";
- for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
- E = LI.vni_end(); I!=E; ++I)
- if (comp == ConEQ.getEqClass(*I))
- errs() << ' ' << (*I)->id;
- errs() << '\n';
- }
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF, LI);
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ errs() << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ errs() << ' ' << (*I)->id;
+ errs() << '\n';
}
}
}
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index def2e3d..d514190 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
@@ -46,6 +47,10 @@ SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false),
cl::Hidden, cl::desc("Split all critical edges during "
"PHI elimination"));
+static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
+ "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden,
+ cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true."));
+
namespace {
class PHIElimination : public MachineFunctionPass {
MachineRegisterInfo *MRI; // Machine register information
@@ -573,12 +578,14 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// there is a risk it may not be coalesced away.
//
// If the copy would be a kill, there is no need to split the edge.
- if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges)
+ bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB);
+ if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit)
continue;
-
- DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
- << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
- << ": " << *BBI);
+ if (ShouldSplit) {
+ DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#"
+ << PreMBB->getNumber() << " -> BB#" << MBB.getNumber()
+ << ": " << *BBI);
+ }
// If Reg is not live-in to MBB, it means it must be live-in to some
// other PreMBB successor, and we can avoid the interference by splitting
@@ -588,7 +595,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// is likely to be left after coalescing. If we are looking at a loop
// exiting edge, split it so we won't insert code in the loop, otherwise
// don't bother.
- bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges;
+ ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB);
// Check for a loop exiting edge.
if (!ShouldSplit && CurLoop != PreLoop) {
@@ -603,7 +610,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// Split unless this edge is entering CurLoop from an outer loop.
ShouldSplit = PreLoop && !PreLoop->contains(CurLoop);
}
- if (!ShouldSplit)
+ if (!ShouldSplit && !SplitAllCriticalEdges)
continue;
if (!PreMBB->SplitCriticalEdge(&MBB, this)) {
DEBUG(dbgs() << "Failed to split critical edge.\n");
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 272d068..c128414 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -23,8 +23,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
@@ -55,9 +54,6 @@ static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
static cl::opt<cl::boolOrDefault>
OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
cl::desc("Enable optimized register allocation compilation path."));
-static cl::opt<cl::boolOrDefault>
-EnableMachineSched("enable-misched",
- cl::desc("Enable the machine instruction scheduling pass."));
static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
cl::Hidden,
cl::desc("Disable Machine LICM"));
@@ -116,28 +112,6 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
return PassID;
}
-/// Allow Pass selection to be overriden by command line options. This supports
-/// flags with ternary conditions. TargetID is passed through by default. The
-/// pass is suppressed when the option is false. When the option is true, the
-/// StandardID is selected if the target provides no default.
-static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
- cl::boolOrDefault Override,
- AnalysisID StandardID) {
- switch (Override) {
- case cl::BOU_UNSET:
- return TargetID;
- case cl::BOU_TRUE:
- if (TargetID.isValid())
- return TargetID;
- if (StandardID == nullptr)
- report_fatal_error("Target cannot enable pass");
- return StandardID;
- case cl::BOU_FALSE:
- return IdentifyingPassPtr();
- }
- llvm_unreachable("Invalid command line option state");
-}
-
/// Allow standard passes to be disabled by the command line, regardless of who
/// is adding the pass.
///
@@ -182,9 +156,6 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
if (StandardID == &MachineCSEID)
return applyDisable(TargetID, DisableMachineCSE);
- if (StandardID == &MachineSchedulerID)
- return applyOverride(TargetID, EnableMachineSched, StandardID);
-
if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
return applyDisable(TargetID, DisablePostRAMachineLICM);
@@ -249,11 +220,6 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
// Substitute Pseudo Pass IDs for real ones.
substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
substitutePass(&PostRAMachineLICMID, &MachineLICMID);
-
- // Temporarily disable experimental passes.
- const TargetSubtargetInfo &ST = *TM->getSubtargetImpl();
- if (!ST.useMachineScheduler())
- disablePass(&MachineSchedulerID);
}
/// Insert InsertedPassID pass after TargetPassID.
@@ -409,10 +375,8 @@ void TargetPassConfig::addIRPasses() {
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
- if (!DisableVerify) {
+ if (!DisableVerify)
addPass(createVerifierPass());
- addPass(createDebugInfoVerifierPass());
- }
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
@@ -455,7 +419,11 @@ void TargetPassConfig::addPassesToHandleExceptions() {
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::WinEH:
+ // We support using both GCC-style and MSVC-style exceptions on Windows, so
+ // add both preparation passes. Each pass will only actually run if it
+ // recognizes the personality function.
addPass(createWinEHPass(TM));
+ addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
addPass(createLowerInvokePass());
@@ -479,12 +447,6 @@ void TargetPassConfig::addCodeGenPrepare() {
void TargetPassConfig::addISelPrepare() {
addPreISel();
- // Need to verify DebugInfo *before* creating the stack protector analysis.
- // It's a function pass, and verifying between it and its users causes a
- // crash.
- if (!DisableVerify)
- addPass(createDebugInfoVerifierPass());
-
addPass(createStackProtectorPass(TM));
if (PrintISelInput)
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 283d1f2..ebe05e3 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -76,6 +76,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -411,8 +412,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
if (ExtendLife && !ExtendedUses.empty())
// Extend the liveness of the extension result.
- std::copy(ExtendedUses.begin(), ExtendedUses.end(),
- std::back_inserter(Uses));
+ Uses.append(ExtendedUses.begin(), ExtendedUses.end());
// Now replace all uses.
bool Changed = false;
@@ -916,7 +916,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
// => v0 = COPY v1
// Currently we haven't seen motivating example for that and we
// want to avoid untested code.
- NumRewrittenCopies += Changed == true;
+ NumRewrittenCopies += Changed;
return Changed;
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 6d29b98..e073e6a 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#include "PrologEpilogInserter.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -28,6 +27,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -48,6 +48,53 @@ using namespace llvm;
#define DEBUG_TYPE "pei"
+namespace {
+class PEI : public MachineFunctionPass {
+public:
+ static char ID;
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Entry and return blocks of the current function.
+ MachineBasicBlock *EntryBlock;
+ SmallVector<MachineBasicBlock *, 4> ReturnBlocks;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the current function.
+ bool FrameIndexVirtualScavenging;
+
+ void calculateSets(MachineFunction &Fn);
+ void calculateCallsInformation(MachineFunction &Fn);
+ void calculateCalleeSavedRegisters(MachineFunction &Fn);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ int &SPAdj);
+ void scavengeFrameVirtualRegs(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+
+ // Convenience for recognizing return blocks.
+ bool isReturnBlock(MachineBasicBlock *MBB);
+};
+} // namespace
+
char PEI::ID = 0;
char &llvm::PrologEpilogCodeInserterID = PEI::ID;
@@ -810,17 +857,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
continue;
}
- // Frame allocations are target independent. Simply swap the index with
- // the offset.
- if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC) {
- assert(TFI->hasFP(Fn) && "frame alloc requires FP");
- MachineOperand &FI = MI->getOperand(i);
- unsigned Reg;
- int FrameOffset = TFI->getFrameIndexReference(Fn, FI.getIndex(), Reg);
- FI.ChangeToImmediate(FrameOffset);
- continue;
- }
-
// Some instructions (e.g. inline asm instructions) can have
// multiple frame indices and/or cause eliminateFrameIndex
// to insert more than one instruction. We need the register
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
deleted file mode 100644
index f88b8ef..0000000
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is responsible for finalizing the functions frame layout, saving
-// callee saved registers, and for emitting prolog & epilog code for the
-// function.
-//
-// This pass must be run after register allocation. After this pass is
-// executed, it is illegal to construct MO_FrameIndex operands.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
-#define LLVM_LIB_CODEGEN_PROLOGEPILOGINSERTER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-namespace llvm {
- class RegScavenger;
- class MachineBasicBlock;
-
- class PEI : public MachineFunctionPass {
- public:
- static char ID;
- PEI() : MachineFunctionPass(ID) {
- initializePEIPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
- /// frame indexes with appropriate references.
- ///
- bool runOnMachineFunction(MachineFunction &Fn) override;
-
- private:
- RegScavenger *RS;
-
- // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
- // stack frame indexes.
- unsigned MinCSFrameIndex, MaxCSFrameIndex;
-
- // Entry and return blocks of the current function.
- MachineBasicBlock* EntryBlock;
- SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
-
- // Flag to control whether to use the register scavenger to resolve
- // frame index materialization registers. Set according to
- // TRI->requiresFrameIndexScavenging() for the curren function.
- bool FrameIndexVirtualScavenging;
-
- void calculateSets(MachineFunction &Fn);
- void calculateCallsInformation(MachineFunction &Fn);
- void calculateCalleeSavedRegisters(MachineFunction &Fn);
- void insertCSRSpillsAndRestores(MachineFunction &Fn);
- void calculateFrameObjectOffsets(MachineFunction &Fn);
- void replaceFrameIndices(MachineFunction &Fn);
- void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
- int &SPAdj);
- void scavengeFrameVirtualRegs(MachineFunction &Fn);
- void insertPrologEpilogCode(MachineFunction &Fn);
-
- // Convenience for recognizing return blocks.
- bool isReturnBlock(MachineBasicBlock* MBB);
- };
-} // End llvm namespace
-#endif
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 6b346f4..16ff48e 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -27,6 +27,7 @@
#endif
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Timer.h"
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index edc3294..e94f1bb 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1554,7 +1554,8 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
- const TargetRegisterClass *SuperRC = TRI->getLargestLegalSuperClass(CurRC);
+ const TargetRegisterClass *SuperRC =
+ TRI->getLargestLegalSuperClass(CurRC, *MF);
unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
// Split around every non-copy instruction if this split will relax
// the constraints on the virtual register.
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 77a42b3..eeff73d 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -178,8 +178,40 @@ class Interference : public PBQPRAConstraint {
private:
typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
- typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey;
- typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+ typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
+ typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
+ typedef DenseSet<IKey> DisjointAllowedRegsCache;
+ typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
+ typedef DenseSet<IEdgeKey> IEdgeCache;
+
+ bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId,
+ const DisjointAllowedRegsCache &D) const {
+ const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+ const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+ if (NRegs == MRegs)
+ return false;
+
+ if (NRegs < MRegs)
+ return D.count(IKey(NRegs, MRegs)) > 0;
+
+ return D.count(IKey(MRegs, NRegs)) > 0;
+ }
+
+ void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
+ PBQPRAGraph::NodeId MId,
+ DisjointAllowedRegsCache &D) {
+ const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
+ const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
+
+ assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
+
+ if (NRegs < MRegs)
+ D.insert(IKey(NRegs, MRegs));
+ else
+ D.insert(IKey(MRegs, NRegs));
+ }
// Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
// for the fast interference graph construction algorithm. The last is there
@@ -247,6 +279,13 @@ public:
// and uniquing them.
IMatrixCache C;
+ // Finding an edge is expensive in the worst case (O(max_clique(G))). So
+ // cache locally edges we have already seen.
+ IEdgeCache EC;
+
+ // Cache known disjoint allowed registers pairs
+ DisjointAllowedRegsCache D;
+
typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
decltype(&lowestStartPoint)> IntervalQueue;
@@ -290,14 +329,21 @@ public:
for (const auto &A : Active) {
PBQP::GraphBase::NodeId MId = getNodeId(A);
+ // Do not add an edge when the nodes' allowed registers do not
+ // intersect: there is obviously no interference.
+ if (haveDisjointAllowedRegs(G, NId, MId, D))
+ continue;
+
// Check that we haven't already added this edge
- // FIXME: findEdge is expensive in the worst case (O(max_clique(G))).
- // It might be better to replace this with a local bit-matrix.
- if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId())
+ IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
+ if (EC.count(EK))
continue;
// This is a new edge - add it to the graph.
- createInterferenceEdge(G, NId, MId, C);
+ if (!createInterferenceEdge(G, NId, MId, C))
+ setDisjointAllowedRegs(G, NId, MId, D);
+ else
+ EC.insert(EK);
}
// Finally, add Cur to the Active set.
@@ -307,35 +353,48 @@ public:
private:
- void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
- PBQPRAGraph::NodeId MId, IMatrixCache &C) {
+ // Create an Interference edge and add it to the graph, unless it is
+ // a null matrix, meaning the nodes' allowed registers do not have any
+ // interference. This case occurs frequently between integer and floating
+ // point registers for example.
+ // return true iff both nodes interferes.
+ bool createInterferenceEdge(PBQPRAGraph &G,
+ PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
+ IMatrixCache &C) {
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getSubtarget().getRegisterInfo();
-
const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
// Try looking the edge costs up in the IMatrixCache first.
- IMatrixKey K(&NRegs, &MRegs);
+ IKey K(&NRegs, &MRegs);
IMatrixCache::iterator I = C.find(K);
if (I != C.end()) {
G.addEdgeBypassingCostAllocator(NId, MId, I->second);
- return;
+ return true;
}
PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
+ bool NodesInterfere = false;
for (unsigned I = 0; I != NRegs.size(); ++I) {
unsigned PRegN = NRegs[I];
for (unsigned J = 0; J != MRegs.size(); ++J) {
unsigned PRegM = MRegs[J];
- if (TRI.regsOverlap(PRegN, PRegM))
+ if (TRI.regsOverlap(PRegN, PRegM)) {
M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ NodesInterfere = true;
+ }
}
}
+ if (!NodesInterfere)
+ return false;
+
PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
C[K] = G.getEdgeCostsPtr(EId);
+
+ return true;
}
};
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index ab33672..178fa18 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -131,7 +131,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
RCI.NumRegs = StressRA;
// Check if RC is a proper sub-class.
- if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
+ if (const TargetRegisterClass *Super =
+ TRI->getLargestLegalSuperClass(RC, *MF))
if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
RCI.ProperSubClass = true;
@@ -175,6 +176,6 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
}
compute(RC);
unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
- return TRI->getRegPressureSetLimit(Idx)
- - TRI->getRegClassWeight(RC).RegWeight * NReserved;
+ return TRI->getRegPressureSetLimit(*MF, Idx) -
+ TRI->getRegClassWeight(RC).RegWeight * NReserved;
}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 1e4cfe8..9e3cf41 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -58,6 +58,10 @@ EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
cl::init(true));
+static cl::opt<bool> UseTerminalRule("terminal-rule",
+ cl::desc("Apply the terminal rule"),
+ cl::init(false));
+
/// Temporary flag to test critical edge unsplitting.
static cl::opt<bool>
EnableJoinSplits("join-splitedges",
@@ -160,12 +164,14 @@ namespace {
/// LaneMask are split as necessary. @p LaneMask are the lanes that
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
/// lanemasks already adjusted to the coalesced register.
- void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+ /// @returns false if live range conflicts couldn't get resolved.
+ bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
unsigned LaneMask, CoalescerPair &CP);
/// Join the liveranges of two subregisters. Joins @p RRange into
/// @p LRange, @p RRange may be invalid afterwards.
- void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+ /// @returns false if live range conflicts couldn't get resolved.
+ bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
unsigned LaneMask, const CoalescerPair &CP);
/// We found a non-trivially-coalescable copy. If the source value number is
@@ -204,6 +210,20 @@ namespace {
/// Returns true if @p CopyMI was a copy of an undef value and eliminated.
bool eliminateUndefCopy(MachineInstr *CopyMI);
+ /// Check whether or not we should apply the terminal rule on the
+ /// destination (Dst) of \p Copy.
+ /// When the terminal rule applies, Copy is not profitable to
+ /// coalesce.
+ /// Dst is terminal if it has exactly one affinity (Dst, Src) and
+ /// at least one interference (Dst, Dst2). If Dst is terminal, the
+ /// terminal rule consists in checking that at least one of
+ /// interfering node, say Dst2, has an affinity of equal or greater
+ /// weight with Src.
+ /// In that case, Dst2 and Dst will not be able to be both coalesced
+ /// with Src. Since Dst2 exposes more coalescing opportunities than
+ /// Dst, we can drop \p Copy.
+ bool applyTerminalRule(const MachineInstr &Copy) const;
+
public:
static char ID; ///< Class identification, replacement for typeinfo
RegisterCoalescer() : MachineFunctionPass(ID) {
@@ -1143,7 +1163,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
- if (SubIdx != 0 && MO.isUse() && MRI->tracksSubRegLiveness()) {
+ if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
if (!DstInt->hasSubRanges()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
@@ -1756,6 +1776,9 @@ public:
void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs);
+ /// Remove liverange defs at places where implicit defs will be removed.
+ void removeImplicitDefs();
+
/// Get the value assignments suitable for passing to LiveInterval::join.
const int *getAssignments() const { return Assignments.data(); }
};
@@ -1856,7 +1879,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
assert(DefMI != nullptr);
if (SubRangeJoin) {
// We don't care about the lanes when joining subregister ranges.
- V.ValidLanes = V.WriteLanes = 1;
+ V.WriteLanes = V.ValidLanes = 1;
+ if (DefMI->isImplicitDef()) {
+ V.ValidLanes = 0;
+ V.ErasableImplicitDef = true;
+ }
} else {
bool Redef = false;
V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
@@ -2339,6 +2366,18 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
LI.removeEmptySubRanges();
}
+void JoinVals::removeImplicitDefs() {
+ for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+ Val &V = Vals[i];
+ if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
+ continue;
+
+ VNInfo *VNI = LR.getValNumInfo(i);
+ VNI->markUnused();
+ LR.removeValNo(VNI);
+ }
+}
+
void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
SmallVectorImpl<unsigned> &ShrinkRegs) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
@@ -2382,7 +2421,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
}
}
-void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
unsigned LaneMask,
const CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
@@ -2392,12 +2431,19 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
NewVNInfo, CP, LIS, TRI, true, true);
// Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
- // Conflicts should already be resolved so the mapping/resolution should
- // always succeed.
- if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
- llvm_unreachable("Can't join subrange although main ranges are compatible");
- if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
- llvm_unreachable("Can't join subrange although main ranges are compatible");
+ // We should be able to resolve all conflicts here as we could successfully do
+ // it on the mainrange already. There is however a problem when multiple
+ // ranges get mapped to the "overflow" lane mask bit which creates unexpected
+ // interferences.
+ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
+ DEBUG(dbgs() << "*** Couldn't join subrange!\n");
+ return false;
+ }
+ if (!LHSVals.resolveConflicts(RHSVals) ||
+ !RHSVals.resolveConflicts(LHSVals)) {
+ DEBUG(dbgs() << "*** Couldn't join subrange!\n");
+ return false;
+ }
// The merging algorithm in LiveInterval::join() can't handle conflicting
// value mappings, so we need to remove any live ranges that overlap a
@@ -2407,6 +2453,9 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
LHSVals.pruneValues(RHSVals, EndPoints, false);
RHSVals.pruneValues(LHSVals, EndPoints, false);
+ LHSVals.removeImplicitDefs();
+ RHSVals.removeImplicitDefs();
+
LRange.verify();
RRange.verify();
@@ -2416,16 +2465,17 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
if (EndPoints.empty())
- return;
+ return true;
// Recompute the parts of the live range we had to remove because of
// CR_Replace conflicts.
DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
<< " points: " << LRange << '\n');
LIS->extendToIndices(LRange, EndPoints);
+ return true;
}
-void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
const LiveRange &ToMerge,
unsigned LaneMask, CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
@@ -2453,7 +2503,8 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
CommonRange = &R;
}
LiveRange RangeCopy(ToMerge, Allocator);
- joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
+ if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
+ return false;
LaneMask &= ~RMask;
}
@@ -2461,13 +2512,14 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask));
LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
}
+ return true;
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
SmallVector<VNInfo*, 16> NewVNInfo;
LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
- bool TrackSubRegLiveness = MRI->tracksSubRegLiveness();
+ bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
TRI, false, TrackSubRegLiveness);
JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
@@ -2511,22 +2563,40 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
// Determine lanemasks of RHS in the coalesced register and merge subranges.
unsigned SrcIdx = CP.getSrcIdx();
+ bool Abort = false;
if (!RHS.hasSubRanges()) {
unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
: TRI->getSubRegIndexLaneMask(SrcIdx);
- mergeSubRangeInto(LHS, RHS, Mask, CP);
+ if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
+ Abort = true;
} else {
// Pair up subranges and merge.
for (LiveInterval::SubRange &R : RHS.subranges()) {
unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
- mergeSubRangeInto(LHS, R, Mask, CP);
+ if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
+ Abort = true;
+ break;
+ }
}
}
+ if (Abort) {
+ // This shouldn't have happened :-(
+ // However we are aware of at least one existing problem where we
+ // can't merge subranges when multiple ranges end up in the
+ // "overflow bit" 32. As a workaround we drop all subregister ranges
+ // which means we loose some precision but are back to a well defined
+ // state.
+ assert((CP.getNewRC()->getLaneMask() & 0x80000000u)
+ && "SubRange merge should only fail when merging into bit 32.");
+ DEBUG(dbgs() << "\tSubrange join aborted!\n");
+ LHS.clearSubRanges();
+ RHS.clearSubRanges();
+ } else {
+ DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
- DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
-
- LHSVals.pruneSubRegValues(LHS, ShrinkMask);
- RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ RHSVals.pruneSubRegValues(LHS, ShrinkMask);
+ }
}
// The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2645,6 +2715,58 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
return Progress;
}
+/// Check if DstReg is a terminal node.
+/// I.e., it does not have any affinity other than \p Copy.
+static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
+ const MachineRegisterInfo *MRI) {
+ assert(Copy.isCopyLike());
+ // Check if the destination of this copy as any other affinity.
+ for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
+ if (&MI != &Copy && MI.isCopyLike())
+ return false;
+ return true;
+}
+
+bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
+ assert(Copy.isCopyLike());
+ if (!UseTerminalRule)
+ return false;
+ // Check if the destination of this copy has any other affinity.
+ unsigned DstReg = Copy.getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ !isTerminalReg(DstReg, Copy, MRI))
+ return false;
+
+ // DstReg is a terminal node. Check if it inteferes with any other
+ // copy involving SrcReg.
+ unsigned SrcReg = Copy.getOperand(1).getReg();
+ const MachineBasicBlock *OrigBB = Copy.getParent();
+ const LiveInterval &DstLI = LIS->getInterval(DstReg);
+ for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
+ // Technically we should check if the weight of the new copy is
+ // interesting compared to the other one and update the weight
+ // of the copies accordingly. However, this would only work if
+ // we would gather all the copies first then coalesce, whereas
+ // right now we interleave both actions.
+ // For now, just consider the copies that are in the same block.
+ if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
+ continue;
+ unsigned OtherReg = MI.getOperand(0).getReg();
+ if (OtherReg == SrcReg)
+ OtherReg = MI.getOperand(1).getReg();
+ // Check if OtherReg is a non-terminal.
+ if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
+ isTerminalReg(OtherReg, MI, MRI))
+ continue;
+ // Check that OtherReg interfere with DstReg.
+ if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
+ DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n');
+ return true;
+ }
+ }
+ return false;
+}
+
void
RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
DEBUG(dbgs() << MBB->getName() << ":\n");
@@ -2659,7 +2781,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
// cmp+jmp macro fusion.
for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
MII != E; ++MII) {
- if (!MII->isCopyLike())
+ if (!MII->isCopyLike() || applyTerminalRule(*MII))
continue;
if (isLocalCopy(&(*MII), LIS))
LocalWorkList.push_back(&(*MII));
@@ -2670,7 +2792,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
else {
for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
MII != E; ++MII)
- if (MII->isCopyLike())
+ if (MII->isCopyLike() && !applyTerminalRule(*MII))
WorkList.push_back(MII);
}
// Try coalescing the collected copies immediately, and remove the nulls.
@@ -2741,7 +2863,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
AA = &getAnalysis<AliasAnalysis>();
Loops = &getAnalysis<MachineLoopInfo>();
if (EnableGlobalCopies == cl::BOU_UNSET)
- JoinGlobalCopies = STI.useMachineScheduler();
+ JoinGlobalCopies = STI.enableJoinGlobalCopies();
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 9925efb..3634103 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -304,6 +304,7 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end();
}
+namespace {
/// Collect this instruction's unique uses and defs into SmallVectors for
/// processing defs and uses in order.
///
@@ -354,6 +355,7 @@ protected:
}
}
};
+} // namespace
/// Collect physical and virtual register operands.
static void collectOperands(const MachineInstr *MI,
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 78bfd23..17dd729 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -96,14 +96,15 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
static void getUnderlyingObjects(const Value *V,
- SmallVectorImpl<Value *> &Objects) {
+ SmallVectorImpl<Value *> &Objects,
+ const DataLayout &DL) {
SmallPtrSet<const Value *, 16> Visited;
SmallVector<const Value *, 4> Working(1, V);
do {
V = Working.pop_back_val();
SmallVector<Value *, 4> Objs;
- GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
I != IE; ++I) {
@@ -132,7 +133,8 @@ UnderlyingObjectsVector;
/// object, return the Value for that object.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo *MFI,
- UnderlyingObjectsVector &Objects) {
+ UnderlyingObjectsVector &Objects,
+ const DataLayout &DL) {
if (!MI->hasOneMemOperand() ||
(!(*MI->memoperands_begin())->getValue() &&
!(*MI->memoperands_begin())->getPseudoValue()) ||
@@ -156,7 +158,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
return;
SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs);
+ getUnderlyingObjects(V, Objs, DL);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
I != IE; ++I) {
@@ -468,7 +470,8 @@ static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
// This MI might have either incomplete info, or known to be unsafe
// to deal with (i.e. volatile object).
static inline bool isUnsafeMemoryObject(MachineInstr *MI,
- const MachineFrameInfo *MFI) {
+ const MachineFrameInfo *MFI,
+ const DataLayout &DL) {
if (!MI || MI->memoperands_empty())
return true;
// We purposefully do no check for hasOneMemOperand() here
@@ -491,7 +494,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
return true;
SmallVector<Value *, 4> Objs;
- getUnderlyingObjects(V, Objs);
+ getUnderlyingObjects(V, Objs, DL);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(),
IE = Objs.end(); I != IE; ++I) {
// Does this pointer refer to a distinct and identifiable object?
@@ -508,7 +511,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
/// these two MIs be reordered during scheduling from memory dependency
/// point of view.
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- MachineInstr *MIa,
+ const DataLayout &DL, MachineInstr *MIa,
MachineInstr *MIb) {
const MachineFunction *MF = MIa->getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -527,7 +530,7 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
return true;
- if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI))
+ if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
return true;
// If we are dealing with two "normal" loads, we do not need an edge
@@ -579,10 +582,10 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
/// This recursive function iterates over chain deps of SUb looking for
/// "latest" node that needs a chain edge to SUa.
-static unsigned
-iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth,
- SmallPtrSetImpl<const SUnit*> &Visited) {
+static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ const DataLayout &DL, SUnit *SUa, SUnit *SUb,
+ SUnit *ExitSU, unsigned *Depth,
+ SmallPtrSetImpl<const SUnit *> &Visited) {
if (!SUa || !SUb || SUb == ExitSU)
return *Depth;
@@ -607,7 +610,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
// add that edge to the predecessors chain of SUb,
// and stop descending.
if (*Depth > 200 ||
- MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
SUb->addPred(SDep(SUa, SDep::MayAliasMem));
return *Depth;
}
@@ -617,7 +620,7 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
I != E; ++I)
if (I->isNormalMemoryOrBarrier())
- iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+ iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
return *Depth;
}
@@ -626,7 +629,8 @@ iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
/// checks whether SU can be aliasing any node dominated
/// by it.
static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList,
+ const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
+ std::set<SUnit *> &CheckList,
unsigned LatencyToLoad) {
if (!SU)
return;
@@ -638,7 +642,7 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
I != IE; ++I) {
if (SU == *I)
continue;
- if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) {
+ if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
SDep Dep(SU, SDep::MayAliasMem);
Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
(*I)->addPred(Dep);
@@ -649,22 +653,22 @@ static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
JE = (*I)->Succs.end(); J != JE; ++J)
if (J->isNormalMemoryOrBarrier())
- iterateChainSucc (AA, MFI, SU, J->getSUnit(),
- ExitSU, &Depth, Visited);
+ iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
+ Visited);
}
}
/// Check whether two objects need a chain edge, if so, add it
/// otherwise remember the rejected SU.
-static inline
-void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI,
- SUnit *SUa, SUnit *SUb,
- std::set<SUnit *> &RejectList,
- unsigned TrueMemOrderLatency = 0,
- bool isNormalMemory = false) {
+static inline void addChainDependency(AliasAnalysis *AA,
+ const MachineFrameInfo *MFI,
+ const DataLayout &DL, SUnit *SUa,
+ SUnit *SUb, std::set<SUnit *> &RejectList,
+ unsigned TrueMemOrderLatency = 0,
+ bool isNormalMemory = false) {
// If this is a false dependency,
// do not add the edge, but rememeber the rejected node.
- if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) {
+ if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
Dep.setLatency(TrueMemOrderLatency);
SUb->addPred(Dep);
@@ -883,7 +887,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain = SU;
// This is a barrier event that acts as a pivotal node in the DAG,
// so it is safe to clear list of exposed nodes.
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
RejectMemNodes.clear();
NonAliasMemDefs.clear();
@@ -896,25 +900,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
unsigned ChainLatency = 0;
if (AliasChain->getInstr()->mayLoad())
ChainLatency = TrueMemOrderLatency;
- addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes,
- ChainLatency);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ RejectMemNodes, ChainLatency);
}
AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes);
}
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
- TrueMemOrderLatency);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, TrueMemOrderLatency);
}
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
PendingLoads.clear();
AliasMemDefs.clear();
@@ -928,7 +934,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
BarrierChain->addPred(SDep(SU, SDep::Barrier));
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs);
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
if (Objs.empty()) {
// Treat all other stores conservatively.
@@ -952,8 +958,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
- 0, true);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, 0, true);
// If we're not using AA, then we only need one store per object.
if (!AAForDep)
@@ -977,7 +983,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ J->second[i], RejectMemNodes,
TrueMemOrderLatency, true);
J->second.clear();
}
@@ -986,13 +993,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Add dependencies from all the PendingLoads, i.e. loads
// with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
- addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
// Add dependence on alias chain, if needed.
if (AliasChain)
- addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ RejectMemNodes);
}
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+ adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
} else if (MI->mayLoad()) {
bool MayAlias = true;
@@ -1000,7 +1009,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// Invariant load, no chain dependencies needed!
} else {
UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs);
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
if (Objs.empty()) {
// A load with no underlying object. Depend on all
@@ -1008,8 +1017,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i],
- RejectMemNodes);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes);
PendingLoads.push_back(SU);
MayAlias = true;
@@ -1032,18 +1041,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
- addChainDependency(AAForDep, MFI, SU, I->second[i],
- RejectMemNodes, 0, true);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, 0, true);
if (ThisMayAlias)
AliasMemUses[V].push_back(SU);
else
NonAliasMemUses[V].push_back(SU);
}
if (MayAlias)
- adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
+ adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
+ RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
- addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes);
+ addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+ RejectMemNodes);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
@@ -1211,7 +1222,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
else if (SU == &ExitSU)
oss << "<exit>";
else
- SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true);
+ SU->getInstr()->print(oss, /*SkipOpers=*/true);
return oss.str();
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6129401..a1c84c5 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -246,10 +246,11 @@ namespace {
SDValue visitSDIVREM(SDNode *N);
SDValue visitUDIVREM(SDNode *N);
SDValue visitAND(SDNode *N);
+ SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitOR(SDNode *N);
+ SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitXOR(SDNode *N);
SDValue SimplifyVBinOp(SDNode *N);
- SDValue SimplifyVUnaryOp(SDNode *N);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -302,6 +303,7 @@ namespace {
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitSCALAR_TO_VECTOR(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue visitMLOAD(SDNode *N);
SDValue visitMSTORE(SDNode *N);
@@ -713,6 +715,22 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
return nullptr;
}
+static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
+ if (isa<ConstantSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
+ return N.getNode();
+ return nullptr;
+}
+
+static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
+ if (isa<ConstantFPSDNode>(N))
+ return N.getNode();
+ if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+ return N.getNode();
+ return nullptr;
+}
+
// \brief Returns the SDNode if it is a constant splat BuildVector or constant
// int.
static ConstantSDNode *isConstOrConstSplat(SDValue N) {
@@ -1180,11 +1198,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
- // Early exit if this basic block is in an optnone function.
- if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
- Attribute::OptimizeNone))
- return;
-
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
@@ -1369,6 +1382,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSTORE: return visitMSTORE(N);
@@ -2685,6 +2699,109 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
}
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an And operation to a single value. This makes them reusable in the context
+/// of visitSELECT(). Rules involving constants are not included as
+/// visitSELECT() already handles those cases.
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
+ EVT VT = N1.getValueType();
+
+ // fold (and x, undef) -> 0
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
+ // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
+ // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ }
+ // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
+ if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
+ Op0 == Op1 && LL.getValueType().isInteger() &&
+ Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
+ cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
+ (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ cast<ConstantSDNode>(RR)->isNullValue()))) {
+ SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
+ LL, DAG.getConstant(1, LL.getValueType()));
+ AddToWorklist(ADDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+ DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ getSetCCResultType(N0.getSimpleValueType())))))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
+ VT.getSizeInBits() <= 64) {
+ if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ APInt ADDC = ADDI->getAPIntValue();
+ if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
+ // immediate for an add, but it is legal if its top c2 bits are set,
+ // transform the ADD so the immediate doesn't need to be materialized
+ // in a register.
+ if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
+ APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+ SRLI->getZExtValue());
+ if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
+ ADDC |= Mask;
+ if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+ SDValue NewAdd =
+ DAG.getNode(ISD::ADD, SDLoc(N0), VT,
+ N0.getOperand(0), DAG.getConstant(ADDC, VT));
+ CombineTo(N0.getNode(), NewAdd);
+ // Return N so it doesn't get rechecked!
+ return SDValue(LocReference, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2716,9 +2833,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return N0;
}
- // fold (and x, undef) -> 0
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(0, VT);
// fold (and c1, c2) -> c1&c2
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -2808,9 +2922,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
- Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
- Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
+ // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
+ if (SplatBitSize % BitWidth == 0) {
+ Constant = APInt::getAllOnesValue(BitWidth);
+ for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
+ Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ }
}
}
@@ -2863,118 +2981,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
- // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
- if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
- }
- // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- }
- // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
- if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
- Op0 == Op1 && LL.getValueType().isInteger() &&
- Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
- cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
- (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
- cast<ConstantSDNode>(RR)->isNullValue()))) {
- SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
- LL, DAG.getConstant(1, LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ADDNode,
- DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getSimpleValueType())))))
- return DAG.getSetCC(SDLoc(N), N0.getValueType(),
- LL, LR, Result);
- }
- }
-
- // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
- if (N0.getOpcode() == N1.getOpcode()) {
- SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
- if (Tmp.getNode()) return Tmp;
- }
-
- // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
- // fold (and (sra)) -> (and (srl)) when possible.
- if (!VT.isVector() &&
- SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
- // fold (zext_inreg (extload x)) -> (zextload x)
- if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
- // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
- if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- N0.hasOneUse()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarType().getSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
// fold (and (load x), 255) -> (zextload x, i8)
// fold (and (extload x, i16), 255) -> (zextload x, i8)
@@ -3046,33 +3052,60 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
- VT.getSizeInBits() <= 64) {
- if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- APInt ADDC = ADDI->getAPIntValue();
- if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
- // immediate for an add, but it is legal if its top c2 bits are set,
- // transform the ADD so the immediate doesn't need to be materialized
- // in a register.
- if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
- APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
- SRLI->getZExtValue());
- if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
- ADDC |= Mask;
- if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- SDValue NewAdd =
- DAG.getNode(ISD::ADD, SDLoc(N0), VT,
- N0.getOperand(0), DAG.getConstant(ADDC, VT));
- CombineTo(N0.getNode(), NewAdd);
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
- }
- }
- }
+ if (SDValue Combined = visitANDLike(N0, N1, N))
+ return Combined;
+
+ // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
+ if (N0.getOpcode() == N1.getOpcode()) {
+ SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+ if (Tmp.getNode()) return Tmp;
}
+ // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+ // fold (and (sra)) -> (and (srl)) when possible.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ // fold (zext_inreg (extload x)) -> (zextload x)
+ if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
+ // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+ if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ N0.hasOneUse()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT MemVT = LN0->getMemoryVT();
+ // If we zero all the possible extended bits, then we can turn this into
+ // a zextload if we are running before legalize or the operation is legal.
+ unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+ if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+ BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+ ((!LegalOperations && !LN0->isVolatile()) ||
+ TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ AddToWorklist(N);
+ CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+ }
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -3338,6 +3371,98 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
}
+/// This contains all DAGCombine rules which reduce two values combined by
+/// an Or operation to a single value \see visitANDLike().
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+ EVT VT = N1.getValueType();
+ // fold (or x, undef) -> -1
+ if (!LegalOperations &&
+ (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
+ // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+ SDValue LL, LR, RL, RR, CC0, CC1;
+ if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+ ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+ ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+ if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+ LL.getValueType().isInteger()) {
+ // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+ // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+ if (cast<ConstantSDNode>(LR)->isNullValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+ SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ORNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+ }
+ // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+ // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
+ if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+ (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+ SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+ LR.getValueType(), LL, RL);
+ AddToWorklist(ANDNode.getNode());
+ return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+ }
+ }
+ // canonicalize equivalent to ll == rl
+ if (LL == RR && LR == RL) {
+ Op1 = ISD::getSetCCSwappedOperands(Op1);
+ std::swap(RL, RR);
+ }
+ if (LL == RL && LR == RR) {
+ bool isInteger = LL.getValueType().isInteger();
+ ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+ if (Result != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC,
+ getSetCCResultType(N0.getValueType())))))
+ return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+ LL, LR, Result);
+ }
+ }
+
+ // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N1.getOperand(1).getOpcode() == ISD::Constant &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ // We can only do this xform if we know that bits from X that are set in C2
+ // but not in C1 are already zero. Likewise for Y.
+ const APInt &LHSMask =
+ cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &RHSMask =
+ cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+ if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+ DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(0), N1.getOperand(0));
+ return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, X,
+ DAG.getConstant(LHSMask | RHSMask, VT));
+ }
+ }
+
+ // (or (and X, M), (and X, N)) -> (and X, (or M, N))
+ if (N0.getOpcode() == ISD::AND &&
+ N1.getOpcode() == ISD::AND &&
+ N0.getOperand(0) == N1.getOperand(0) &&
+ // Don't increase # computations.
+ (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+ SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+ N0.getOperand(1), N1.getOperand(1));
+ return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3425,12 +3550,6 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
- // fold (or x, undef) -> -1
- if (!LegalOperations &&
- (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
- return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
- }
// fold (or c1, c2) -> c1|c2
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -3449,6 +3568,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+ if (SDValue Combined = visitORLike(N0, N1, N))
+ return Combined;
+
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
SDValue BSwap = MatchBSwapHWord(N, N0, N1);
if (BSwap.getNode())
@@ -3474,91 +3596,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return SDValue();
}
}
- // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
- // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
- if (cast<ConstantSDNode>(LR)->isNullValue() &&
- (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
- }
- // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
- // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
- if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
- (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC,
- getSetCCResultType(N0.getValueType())))))
- return DAG.getSetCC(SDLoc(N), N0.getValueType(),
- LL, LR, Result);
- }
- }
-
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
if (Tmp.getNode()) return Tmp;
}
- // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
- if (N0.getOpcode() == ISD::AND &&
- N1.getOpcode() == ISD::AND &&
- N0.getOperand(1).getOpcode() == ISD::Constant &&
- N1.getOperand(1).getOpcode() == ISD::Constant &&
- // Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
- // We can only do this xform if we know that bits from X that are set in C2
- // but not in C1 are already zero. Likewise for Y.
- const APInt &LHSMask =
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- const APInt &RHSMask =
- cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
-
- if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
- DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
- SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
- N0.getOperand(0), N1.getOperand(0));
- return DAG.getNode(ISD::AND, SDLoc(N), VT, X,
- DAG.getConstant(LHSMask | RHSMask, VT));
- }
- }
-
- // (or (and X, M), (and X, N)) -> (and X, (or M, N))
- if (N0.getOpcode() == ISD::AND &&
- N1.getOpcode() == ISD::AND &&
- N0.getOperand(0) == N1.getOperand(0) &&
- // Don't increase # computations.
- (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
- SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
- N0.getOperand(1), N1.getOperand(1));
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), X);
- }
-
// See if this is some rotate idiom.
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
@@ -3947,6 +3990,32 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+ // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
+ // Here is a concrete example of this equivalence:
+ // i16 x == 14
+ // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
+ // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
+ //
+ // =>
+ //
+ // i16 ~1 == 0b1111111111111110
+ // i16 rol(~1, 14) == 0b1011111111111111
+ //
+ // Some additional tips to help conceptualize this transform:
+ // - Try to see the operation as placing a single zero in a value of all ones.
+ // - There exists no value for x which would allow the result to contain zero.
+ // - Values of x larger than the bitwidth are undefined and do not require a
+ // consistent result.
+ // - Pushing the zero left requires shifting one bits in from the right.
+ // A rotate left of ~1 is a nice way of achieving the desired result.
+ if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()))
+ if (N0.getOpcode() == ISD::SHL)
+ if (auto *ShlLHS = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+ if (N1C->isAllOnesValue() && ShlLHS->isOne())
+ return DAG.getNode(ISD::ROTL, SDLoc(N), VT, DAG.getConstant(~1, VT),
+ N0.getOperand(1));
+
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
@@ -4792,6 +4861,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SimplifySelect(SDLoc(N), N0, N1, N2);
}
+ if (VT0 == MVT::i1) {
+ if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+ InnerSelect, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+ InnerSelect);
+ }
+ }
+
+ // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+ if (N1->getOpcode() == ISD::SELECT) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2) {
+ // Create the actual and node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+ N0, N1_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+ N1_1, N2);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1_1, N2);
+ }
+ }
+ // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+ if (N2->getOpcode() == ISD::SELECT) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1) {
+ // Create the actual or node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+ N0, N2_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+ N1, N2_2);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1, N2_2);
+ }
+ }
+ }
+
return SDValue();
}
@@ -6440,7 +6572,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (N0.getValueType() == N->getValueType(0))
return N0;
// fold (truncate c1) -> c1
- if (isa<ConstantSDNode>(N0))
+ if (isConstantIntBuildVectorOrConstantInt(N0))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
@@ -7453,14 +7585,23 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// Fold scalars or any vector constants (not just splats).
// This fold is done in general by InstCombine, but extra fmul insts
// may have been generated during lowering.
+ SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
- if ((N1CFP && isConstOrConstSplatFP(N01)) ||
- (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
- SDLoc SL(N);
- SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
- return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+
+ // Check 1: Make sure that the first operand of the inner multiply is NOT
+ // a constant. Otherwise, we may induce infinite looping.
+ if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
+ // Check 2: Make sure that the second operand of the inner multiply and
+ // the second operand of the outer multiply are constants.
+ if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+ (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+ SDLoc SL(N);
+ SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
+ return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts);
+ }
}
}
@@ -7821,8 +7962,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (sint_to_fp c1) -> c1fp
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- if (N0C &&
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -7874,8 +8014,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT OpVT = N0.getValueType();
// fold (uint_to_fp c1) -> c1fp
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- if (N0C &&
+ if (isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
@@ -8033,7 +8172,6 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
EVT VT = N->getValueType(0);
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
@@ -8042,7 +8180,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
return SDValue();
// fold (fp_extend c1fp) -> c1fp
- if (N0CFP)
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
// Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
@@ -8117,14 +8255,9 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVUnaryOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
-
// Constant fold FNEG.
- if (isa<ConstantFPSDNode>(N0))
- return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
+ if (isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
&DAG.getTarget().Options))
@@ -8219,13 +8352,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (VT.isVector()) {
- SDValue FoldedVOp = SimplifyVUnaryOp(N);
- if (FoldedVOp.getNode()) return FoldedVOp;
- }
-
// fold (fabs c1) -> fabs(c1)
- if (isa<ConstantFPSDNode>(N0))
+ if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
// fold (fabs (fabs x)) -> (fabs x)
@@ -8941,7 +9069,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(),
LD->isInvariant(), Align, LD->getAAInfo());
- return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
+ if (NewLoad.getNode() != N)
+ return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
}
}
}
@@ -9106,9 +9235,6 @@ struct LoadedSlice {
unsigned Shift = 0, SelectionDAG *DAG = nullptr)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
- LoadedSlice(const LoadedSlice &LS)
- : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {}
-
/// \brief Get the bits used in a chunk of bits \p BitWidth large.
/// \return Result is \p BitWidth and has used bits set to 1 and
/// not used bits set to 0.
@@ -9855,6 +9981,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
return SDValue();
}
+namespace {
/// Helper struct to parse and store a memory address as base + index + offset.
/// We ignore sign extensions when it is safe to do so.
/// The following two expressions are not equivalent. To differentiate we need
@@ -9942,6 +10069,7 @@ struct BaseIndexOffset {
return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
}
};
+} // namespace
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
@@ -10575,11 +10703,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment())
- return DAG.getTruncStore(Chain, SDLoc(N), Value,
+ if (Align > ST->getAlignment()) {
+ SDValue NewStore =
+ DAG.getTruncStore(Chain, SDLoc(N), Value,
Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align,
ST->getAAInfo());
+ if (NewStore.getNode() != N)
+ return CombineTo(ST, NewStore, true);
+ }
}
}
@@ -11226,12 +11358,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
- SDValue V = reduceBuildVecExtToExtBuildVec(N);
- if (V.getNode())
+ if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
- V = reduceBuildVecConvertToConvertBuildVec(N);
- if (V.getNode())
+ if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
return V;
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -11352,7 +11482,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
} else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
// If the input vector is too large, try to split it.
// We don't support having two input vectors that are too large.
- if (VecIn2.getNode())
+ // If the zero vector was used, we can not split the vector,
+ // since we'd need 3 inputs.
+ if (UsesZeroVector || VecIn2.getNode())
return SDValue();
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
@@ -11364,7 +11496,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(0, TLI.getVectorIdxTy()));
- UsesZeroVector = false;
} else
return SDValue();
}
@@ -11465,14 +11596,12 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
unsigned NumElts = OpVT.getVectorNumElements();
if (ISD::UNDEF == Op.getOpcode())
- for (unsigned i = 0; i != NumElts; ++i)
- Opnds.push_back(DAG.getUNDEF(MinVT));
+ Opnds.append(NumElts, DAG.getUNDEF(MinVT));
if (ISD::BUILD_VECTOR == Op.getOpcode()) {
if (SVT.isFloatingPoint()) {
assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
- for (unsigned i = 0; i != NumElts; ++i)
- Opnds.push_back(Op.getOperand(i));
+ Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
} else {
for (unsigned i = 0; i != NumElts; ++i)
Opnds.push_back(
@@ -11850,7 +11979,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// We may have jumped through bitcasts, so the type of the
// BUILD_VECTOR may not match the type of the shuffle.
if (V->getValueType(0) != VT)
- NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
+ NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
return NewBV;
}
}
@@ -11872,6 +12001,81 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
+ // If this shuffle only has a single input that is a bitcasted shuffle,
+ // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
+ // back to their original types.
+ if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
+ TLI.isTypeLegal(VT)) {
+
+ // Peek through the bitcast only if there is one user.
+ SDValue BC0 = N0;
+ while (BC0.getOpcode() == ISD::BITCAST) {
+ if (!BC0.hasOneUse())
+ break;
+ BC0 = BC0.getOperand(0);
+ }
+
+ auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
+ if (Scale == 1)
+ return SmallVector<int, 8>(Mask.begin(), Mask.end());
+
+ SmallVector<int, 8> NewMask;
+ for (int M : Mask)
+ for (int s = 0; s != Scale; ++s)
+ NewMask.push_back(M < 0 ? -1 : Scale * M + s);
+ return NewMask;
+ };
+
+ if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
+ EVT SVT = VT.getScalarType();
+ EVT InnerVT = BC0->getValueType(0);
+ EVT InnerSVT = InnerVT.getScalarType();
+
+ // Determine which shuffle works with the smaller scalar type.
+ EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
+ EVT ScaleSVT = ScaleVT.getScalarType();
+
+ if (TLI.isTypeLegal(ScaleVT) &&
+ 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
+ 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
+
+ int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+ int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
+
+ // Scale the shuffle masks to the smaller scalar type.
+ ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
+ SmallVector<int, 8> InnerMask =
+ ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
+ SmallVector<int, 8> OuterMask =
+ ScaleShuffleMask(SVN->getMask(), OuterScale);
+
+ // Merge the shuffle masks.
+ SmallVector<int, 8> NewMask;
+ for (int M : OuterMask)
+ NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
+
+ // Test for shuffle mask legality over both commutations.
+ SDValue SV0 = BC0->getOperand(0);
+ SDValue SV1 = BC0->getOperand(1);
+ bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ if (!LegalMask) {
+ std::swap(SV0, SV1);
+ ShuffleVectorSDNode::commuteMask(NewMask);
+ LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
+ }
+
+ if (LegalMask) {
+ SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
+ SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
+ return DAG.getNode(
+ ISD::BITCAST, SDLoc(N), VT,
+ DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
+ }
+ }
+ }
+ }
+
// Canonicalize shuffles according to rules:
// shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
// shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
@@ -11981,16 +12185,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// Avoid introducing shuffles with illegal mask.
if (!TLI.isShuffleMaskLegal(Mask, VT)) {
- // Compute the commuted shuffle mask and test again.
- for (unsigned i = 0; i != NumElts; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElts)
- Mask[i] = idx + NumElts;
- else
- Mask[i] = idx - NumElts;
- }
+ ShuffleVectorSDNode::commuteMask(Mask);
if (!TLI.isShuffleMaskLegal(Mask, VT))
return SDValue();
@@ -12010,6 +12205,34 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
+ SDValue InVal = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
+ // with a VECTOR_SHUFFLE.
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ SDValue InVec = InVal->getOperand(0);
+ SDValue EltNo = InVal->getOperand(1);
+
+ // FIXME: We could support implicit truncation if the shuffle can be
+ // scaled to a smaller vector scalar type.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
+ if (C0 && VT == InVec.getValueType() &&
+ VT.getScalarType() == InVal.getValueType()) {
+ SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+ int Elt = C0->getZExtValue();
+ NewMask[0] = Elt;
+
+ if (TLI.isShuffleMaskLegal(NewMask, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
+ NewMask);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N2 = N->getOperand(2);
@@ -12043,44 +12266,51 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
/// vector_shuffle V, Zero, <0, 4, 2, 4>
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
- SDLoc dl(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- if (N->getOpcode() == ISD::AND) {
- if (RHS.getOpcode() == ISD::BITCAST)
- RHS = RHS.getOperand(0);
- if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<int, 8> Indices;
- unsigned NumElts = RHS.getNumOperands();
- for (unsigned i = 0; i != NumElts; ++i) {
- SDValue Elt = RHS.getOperand(i);
- if (!isa<ConstantSDNode>(Elt))
- return SDValue();
+ SDLoc dl(N);
- if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
- Indices.push_back(i);
- else if (cast<ConstantSDNode>(Elt)->isNullValue())
- Indices.push_back(NumElts+i);
- else
- return SDValue();
- }
+ // Make sure we're not running after operation legalization where it
+ // may have custom lowered the vector shuffles.
+ if (LegalOperations)
+ return SDValue();
+
+ if (N->getOpcode() != ISD::AND)
+ return SDValue();
- // Let's see if the target supports this vector_shuffle and make sure
- // we're not running after operation legalization where it may have
- // custom lowered the vector shuffles.
- EVT RVT = RHS.getValueType();
- if (LegalOperations || !TLI.isVectorClearMaskLegal(Indices, RVT))
+ if (RHS.getOpcode() == ISD::BITCAST)
+ RHS = RHS.getOperand(0);
+
+ if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+ SmallVector<int, 8> Indices;
+ unsigned NumElts = RHS.getNumOperands();
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Elt = RHS.getOperand(i);
+ if (!isa<ConstantSDNode>(Elt))
return SDValue();
- // Return the new VECTOR_SHUFFLE node.
- EVT EltVT = RVT.getVectorElementType();
- SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
- DAG.getConstant(0, EltVT));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
- LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
- SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+ if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+ Indices.push_back(i);
+ else if (cast<ConstantSDNode>(Elt)->isNullValue())
+ Indices.push_back(NumElts+i);
+ else
+ return SDValue();
}
+
+ // Let's see if the target supports this vector_shuffle.
+ EVT RVT = RHS.getValueType();
+ if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+ return SDValue();
+
+ // Return the new VECTOR_SHUFFLE node.
+ EVT EltVT = RVT.getVectorElementType();
+ SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+ DAG.getConstant(0, EltVT));
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+ SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
}
return SDValue();
@@ -12093,8 +12323,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- SDValue Shuffle = XformToShuffleWithZero(N);
- if (Shuffle.getNode()) return Shuffle;
+
+ if (SDValue Shuffle = XformToShuffleWithZero(N))
+ return Shuffle;
// If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
// this operation.
@@ -12172,38 +12403,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return SDValue();
}
-/// Visit a binary vector operation, like FABS/FNEG.
-SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- "SimplifyVUnaryOp only works on vectors!");
-
- SDValue N0 = N->getOperand(0);
-
- if (N0.getOpcode() != ISD::BUILD_VECTOR)
- return SDValue();
-
- // Operand is a BUILD_VECTOR node, see if we can constant fold it.
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
- SDValue Op = N0.getOperand(i);
- if (Op.getOpcode() != ISD::UNDEF &&
- Op.getOpcode() != ISD::ConstantFP)
- break;
- EVT EltVT = Op.getValueType();
- SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op);
- if (FoldOp.getOpcode() != ISD::UNDEF &&
- FoldOp.getOpcode() != ISD::ConstantFP)
- break;
- Ops.push_back(FoldOp);
- AddToWorklist(FoldOp.getNode());
- }
-
- if (Ops.size() != N0.getNumOperands())
- return SDValue();
-
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
-}
-
SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
SDValue N1, SDValue N2){
assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 1df4a1d..223a149 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -62,6 +62,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -497,7 +498,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
OI != E; ++OI) {
const Value *Idx = *OI;
if (auto *StTy = dyn_cast<StructType>(Ty)) {
- unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+ uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
@@ -518,8 +519,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
if (CI->isZero())
continue;
// N = N + Offset
- TotalOffs +=
- DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
+ uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
+ TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
if (TotalOffs >= MaxOffs) {
N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.
@@ -801,7 +802,8 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
return false;
// Push the register mask info.
- Ops.push_back(MachineOperand::CreateRegMask(TRI.getCallPreservedMask(CC)));
+ Ops.push_back(MachineOperand::CreateRegMask(
+ TRI.getCallPreservedMask(*FuncInfo.MF, CC)));
// Add scratch registers as implicit def and early clobber.
const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 7e72dc6..291b583 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 61c0a6f..ece38f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1442,13 +1442,27 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+ SDValue NewLoad;
+
if (Op.getValueType().isVector())
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
- false, false, false, 0);
- return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
- MachinePointerInfo(),
- Vec.getValueType().getVectorElementType(),
- false, false, false, 0);
+ NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,
+ MachinePointerInfo(), false, false, false, 0);
+ else
+ NewLoad = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(), false, false, false, 0);
+
+ // Replace the chain going out of the store, by the one out of the load.
+ DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1));
+
+ // We introduced a cycle though, so update the loads operands, making sure
+ // to use the original store's chain as an incoming chain.
+ SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(),
+ NewLoad->op_end());
+ NewLoadOperands[0] = Ch;
+ NewLoad =
+ SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0);
+ return NewLoad;
}
SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
@@ -2817,132 +2831,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC;
-
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic intrinsic Expand!");
- case ISD::ATOMIC_SWAP:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
- case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
- case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
- case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
- case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
- }
- break;
- case ISD::ATOMIC_CMP_SWAP:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
- case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
- case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
- case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
- case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_ADD:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_SUB:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_AND:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_OR:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_XOR:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_NAND:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_MAX:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_UMAX:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_MIN:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_UMIN:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break;
- }
- break;
- }
+ RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
return ExpandChainLibCall(LC, Node, false);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5507c70..25e80b9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1116,7 +1116,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
- assert(OpNo == 2 && "Only know how to promote the mask!");
SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
@@ -1127,7 +1126,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
if (!TLI.isTypeLegal(DataVT)) {
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
DataOp = GetPromotedInteger(DataOp);
- Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+ if (!TLI.isTypeLegal(MaskVT))
+ Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
TruncateStore = true;
}
else {
@@ -1323,92 +1323,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
unsigned Opc = Node->getOpcode();
MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
- RTLIB::Libcall LC;
-
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic intrinsic Expand!");
- case ISD::ATOMIC_SWAP:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
- case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
- case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
- case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
- case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break;
- }
- break;
- case ISD::ATOMIC_CMP_SWAP:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
- case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
- case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
- case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
- case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_ADD:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_SUB:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_AND:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_OR:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_XOR:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break;
- }
- break;
- case ISD::ATOMIC_LOAD_NAND:
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type for atomic!");
- case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
- case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
- case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
- case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
- case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break;
- }
- break;
- }
+ RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
return ExpandChainLibCall(LC, Node, false);
}
@@ -1417,12 +1333,19 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
/// and the shift amount is a constant 'Amt'. Expand the operation.
void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi) {
- assert(Amt && "Expected zero shifts to be already optimized away.");
SDLoc DL(N);
// Expand the incoming operand to be shifted, so that we have its parts
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
+ // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization
+ // splitted a vector shift, like this: <op1, op2> SHL <0, 2>.
+ if (!Amt) {
+ Lo = InL;
+ Hi = InH;
+ return;
+ }
+
EVT NVT = InL.getValueType();
unsigned VTBits = N->getValueType(0).getSizeInBits();
unsigned NVTBits = NVT.getSizeInBits();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 63671f7..f7e4557 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2553,6 +2553,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
assert(InVT.isVector() && "can not widen non-vector type");
EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
InVT.getVectorElementType(), WidenNumElts);
+
+ // The input and output types often differ here, and it could be that while
+ // we'd prefer to widen the result type, the input operands have been split.
+ // In this case, we also need to split the result of this node as well.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
+ SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
+ return Res;
+ }
+
InOp1 = GetWidenedVector(InOp1);
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index db38b76..6303422 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -47,7 +47,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
TRI = STI.getRegisterInfo();
TLI = IS->TLI;
TII = STI.getInstrInfo();
- ResourcesModel = TII->CreateTargetScheduleState(STI);
+ ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
// This hard requirement could be relaxed, but for now
// do not let it procede.
assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
@@ -637,17 +637,3 @@ void ResourcePriorityQueue::remove(SUnit *SU) {
Queue.pop_back();
}
-
-
-#ifdef NDEBUG
-void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
-#else
-void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
- ResourcePriorityQueue q = *this;
- while (!q.empty()) {
- SUnit *su = q.pop();
- dbgs() << "Height " << su->getHeight() << ": ";
- su->dump(DAG);
- }
-}
-#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9466f4d..b52f648 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -196,6 +196,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
return true;
}
+/// \brief Return true if the specified node is a BUILD_VECTOR node of
+/// all ConstantFPSDNode or undef.
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+ if (N->getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ SDValue Op = N->getOperand(i);
+ if (Op.getOpcode() == ISD::UNDEF)
+ continue;
+ if (!isa<ConstantFPSDNode>(Op))
+ return false;
+ }
+ return true;
+}
+
/// isScalarToVector - Return true if the specified node is a
/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
/// element is not an undef.
@@ -1446,13 +1462,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
// N2 to point at N1.
static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
std::swap(N1, N2);
- int NElts = M.size();
- for (int i = 0; i != NElts; ++i) {
- if (M[i] >= NElts)
- M[i] -= NElts;
- else if (M[i] >= 0)
- M[i] += NElts;
- }
+ ShuffleVectorSDNode::commuteMask(M);
}
SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
@@ -1625,19 +1635,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
MVT VT = SV.getSimpleValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> MaskVec;
-
- for (unsigned i = 0; i != NumElems; ++i) {
- int Idx = SV.getMaskElt(i);
- if (Idx >= 0) {
- if (Idx < (int)NumElems)
- Idx += NumElems;
- else
- Idx -= NumElems;
- }
- MaskVec.push_back(Idx);
- }
+ SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+ ShuffleVectorSDNode::commuteMask(MaskVec);
SDValue Op0 = SV.getOperand(0);
SDValue Op1 = SV.getOperand(1);
@@ -2844,7 +2843,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
}
}
- // Constant fold unary operations with a vector integer operand.
+ // Constant fold unary operations with a vector integer or float operand.
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
if (BV->isConstant()) {
switch (Opcode) {
@@ -2852,18 +2851,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
// FIXME: Entirely reasonable to perform folding of other unary
// operations here as the need arises.
break;
+ case ISD::FNEG:
+ case ISD::FABS:
+ case ISD::FP_EXTEND:
+ case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
+ // Let the above scalar folding handle the folding of each element.
SmallVector<SDValue, 8> Ops;
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue OpN = BV->getOperand(i);
- // Let the above scalar folding handle the conversion of each
- // element.
- OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(),
- OpN);
+ OpN = getNode(Opcode, DL, VT.getVectorElementType(), OpN);
+ if (OpN.getOpcode() != ISD::UNDEF &&
+ OpN.getOpcode() != ISD::Constant &&
+ OpN.getOpcode() != ISD::ConstantFP)
+ break;
Ops.push_back(OpN);
}
- return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+ if (Ops.size() == VT.getVectorNumElements())
+ return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
}
}
}
@@ -5418,17 +5424,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
assert(N->getNumOperands() == NumOps &&
"Update with wrong number of operands");
- // Check to see if there is no change.
- bool AnyChange = false;
- for (unsigned i = 0; i != NumOps; ++i) {
- if (Ops[i] != N->getOperand(i)) {
- AnyChange = true;
- break;
- }
- }
-
- // No operands changed, just return the input node.
- if (!AnyChange) return N;
+ // If no operands changed just return the input node.
+ if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+ return N;
// See if the modified node already exists.
void *InsertPos = nullptr;
@@ -6673,8 +6671,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
- llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
- TLI->getDataLayout());
+ llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
+ *TLI->getDataLayout());
unsigned AlignBits = KnownZero.countTrailingOnes();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 097b618..6c14e79 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1016,6 +1016,24 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
}
}
+/// getCopyFromRegs - If there was virtual register allocated for the value V
+/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ SDValue res;
+
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+ RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
+ Ty);
+ SDValue Chain = DAG.getEntryNode();
+ res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ resolveDanglingDebugInfo(V, res);
+ }
+
+ return res;
+}
+
/// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If we already have an SDValue for this value, use it. It's important
@@ -1026,15 +1044,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If there's a virtual register allocated and initialized for this
// value, use it.
- DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
- if (It != FuncInfo.ValueMap.end()) {
- unsigned InReg = It->second;
- RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
- V->getType());
- SDValue Chain = DAG.getEntryNode();
- N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
- resolveDanglingDebugInfo(V, N);
- return N;
+ SDValue copyFromReg = getCopyFromRegs(V, V->getType());
+ if (copyFromReg.getNode()) {
+ return copyFromReg;
}
// Otherwise create a new SDValue and remember it.
@@ -1573,19 +1585,13 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// Update machine-CFG edges.
MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
- // Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = BrMBB;
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
if (I.isUnconditional()) {
// Update machine-CFG edges.
BrMBB->addSuccessor(Succ0MBB);
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
- if (Succ0MBB != NextBlock || TM.getOptLevel() == CodeGenOpt::None)
+ if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Succ0MBB)));
@@ -1682,7 +1688,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
- const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
SDValue CmpOp = getValue(CB.CmpMHS);
EVT VT = CmpOp.getValueType();
@@ -1705,16 +1711,9 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
if (CB.TrueBB != CB.FalseBB)
addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
- // Set NextBlock to be the MBB immediately after the current one, if any.
- // This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = SwitchBB;
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
// If the lhs block is the next block, invert the condition so that we can
// fall through to the lhs instead of the rhs block.
- if (CB.TrueBB == NextBlock) {
+ if (CB.TrueBB == NextBlock(SwitchBB)) {
std::swap(CB.TrueBB, CB.FalseBB);
SDValue True = DAG.getConstant(1, Cond.getValueType());
Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
@@ -1781,19 +1780,12 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
Sub.getValueType()),
Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT);
- // Set NextBlock to be the MBB immediately after the current one, if any.
- // This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = SwitchBB;
-
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(),
MVT::Other, CopyTo, CMP,
DAG.getBasicBlock(JT.Default));
- if (JT.MBB != NextBlock)
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond,
DAG.getBasicBlock(JT.MBB));
@@ -1922,13 +1914,6 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(),
B.Reg, Sub);
- // Set NextBlock to be the MBB immediately after the current one, if any.
- // This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = SwitchBB;
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
addSuccessorWithWeight(SwitchBB, B.Default);
@@ -1938,7 +1923,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MVT::Other, CopyTo, RangeCmp,
DAG.getBasicBlock(B.Default));
- if (MBB != NextBlock)
+ // Avoid emitting unnecessary branches to the next block.
+ if (MBB != NextBlock(SwitchBB))
BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo,
DAG.getBasicBlock(MBB));
@@ -1991,14 +1977,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
MVT::Other, getControlRoot(),
Cmp, DAG.getBasicBlock(B.TargetBB));
- // Set NextBlock to be the MBB immediately after the current one, if any.
- // This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = nullptr;
- MachineFunction::iterator BBI = SwitchBB;
- if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
-
- if (NextMBB != NextBlock)
+ // Avoid emitting unnecessary branches to the next block.
+ if (NextMBB != NextBlock(SwitchBB))
BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd,
DAG.getBasicBlock(NextMBB));
@@ -2027,13 +2007,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(&I, LandingPad);
break;
+ case Intrinsic::experimental_gc_statepoint:
+ LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+ break;
}
} else
LowerCallTo(&I, getValue(Callee), false, LandingPad);
// If the value of the invoke is used outside of its defining block, make it
// available as a virtual register.
- CopyToExportRegsIfNeeded(&I);
+ // We already took care of the exported value for the statepoint instruction
+ // during call to the LowerStatepoint.
+ if (!isStatepoint(I)) {
+ CopyToExportRegsIfNeeded(&I);
+ }
// Update successor info
addSuccessorWithWeight(InvokeMBB, Return);
@@ -2128,11 +2115,10 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
MachineFunction *CurMF = FuncInfo.MF;
// Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = nullptr;
+ MachineBasicBlock *NextMBB = nullptr;
MachineFunction::iterator BBI = CR.CaseBB;
-
if (++BBI != FuncInfo.MF->end())
- NextBlock = BBI;
+ NextMBB = BBI;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
// If any two of the cases has the same destination, and if one value
@@ -2146,8 +2132,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
Case &Big = *(CR.Range.second-1);
if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
- const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
- const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+ const APInt& SmallValue = Small.Low->getValue();
+ const APInt& BigValue = Big.Low->getValue();
// Check that there is only one bit different.
if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
@@ -2205,13 +2191,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
}
// Rearrange the case blocks so that the last one falls through if possible.
Case &BackCase = *(CR.Range.second-1);
- if (Size > 1 &&
- NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
- // The last case block won't fall through into 'NextBlock' if we emit the
+ if (Size > 1 && NextMBB && Default != NextMBB && BackCase.BB != NextMBB) {
+ // The last case block won't fall through into 'NextMBB' if we emit the
// branches in this order. See if rearranging a case value would help.
// We start at the bottom as it's the case with the least weight.
for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I)
- if (I->BB == NextBlock) {
+ if (I->BB == NextMBB) {
std::swap(*I, BackCase);
break;
}
@@ -2287,8 +2272,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
Case& FrontCase = *CR.Range.first;
Case& BackCase = *(CR.Range.second-1);
- const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt &First = FrontCase.Low->getValue();
+ const APInt &Last = BackCase.High->getValue();
APInt TSize(First.getBitWidth(), 0);
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
@@ -2338,8 +2323,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
std::vector<MachineBasicBlock*> DestBBs;
APInt TEI = First;
for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
- const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
- const APInt &High = cast<ConstantInt>(I->High)->getValue();
+ const APInt &Low = I->Low->getValue();
+ const APInt &High = I->High->getValue();
if (Low.sle(TEI) && TEI.sle(High)) {
DestBBs.push_back(I->BB);
@@ -2352,26 +2337,19 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
// Calculate weight for each unique destination in CR.
DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
- if (FuncInfo.BPI)
- for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
- DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
- DestWeights.find(I->BB);
- if (Itr != DestWeights.end())
- Itr->second += I->ExtraWeight;
- else
- DestWeights[I->BB] = I->ExtraWeight;
- }
+ if (FuncInfo.BPI) {
+ for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
+ DestWeights[I->BB] += I->ExtraWeight;
+ }
// Update successor info. Add one edge to each unique successor.
BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
- for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
- E = DestBBs.end(); I != E; ++I) {
- if (!SuccsHandled[(*I)->getNumber()]) {
- SuccsHandled[(*I)->getNumber()] = true;
- DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
- DestWeights.find(*I);
- addSuccessorWithWeight(JumpTableBB, *I,
- Itr != DestWeights.end() ? Itr->second : 0);
+ for (MachineBasicBlock *DestBB : DestBBs) {
+ if (!SuccsHandled[DestBB->getNumber()]) {
+ SuccsHandled[DestBB->getNumber()] = true;
+ auto I = DestWeights.find(DestBB);
+ addSuccessorWithWeight(JumpTableBB, DestBB,
+ I != DestWeights.end() ? I->second : 0);
}
}
@@ -2403,8 +2381,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// Size is the number of Cases represented by this range.
unsigned Size = CR.Range.second - CR.Range.first;
- const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt &First = FrontCase.Low->getValue();
+ const APInt &Last = BackCase.High->getValue();
double FMetric = 0;
CaseItr Pivot = CR.Range.first + Size/2;
@@ -2423,8 +2401,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
J!=E; ++I, ++J) {
- const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
- const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+ const APInt &LEnd = I->High->getValue();
+ const APInt &RBegin = J->Low->getValue();
APInt Range = ComputeRange(LEnd, RBegin);
assert((Range - 2ULL).isNonNegative() &&
"Invalid case distance");
@@ -2479,7 +2457,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
- const Constant *C = Pivot->Low;
+ const ConstantInt *C = Pivot->Low;
MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr;
// We know that we branch to the LHS if the Value being switched on is
@@ -2489,8 +2467,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
// Pivot's Value, then we can branch directly to the LHS's Target,
// rather than creating a leaf node for it.
if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE &&
- cast<ConstantInt>(C)->getValue() ==
- (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+ C->getValue() == (CR.GE->getValue() + 1LL)) {
TrueBB = LHSR.first->BB;
} else {
TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
@@ -2506,8 +2483,7 @@ void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
// is CR.LT - 1, then we can branch directly to the target block for
// the current Case Value, rather than emitting a RHS leaf node for it.
if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
- cast<ConstantInt>(RHSR.first->Low)->getValue() ==
- (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+ RHSR.first->Low->getValue() == (CR.LT->getValue() - 1LL)) {
FalseBB = RHSR.first->BB;
} else {
FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
@@ -2571,8 +2547,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
<< "Total number of comparisons: " << numCmps << '\n');
// Compute span of values.
- const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt& minValue = FrontCase.Low->getValue();
+ const APInt& maxValue = BackCase.High->getValue();
APInt cmpRange = maxValue - minValue;
DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
@@ -2612,8 +2588,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
count++;
}
- const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
- const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+ const APInt& lowValue = I->Low->getValue();
+ const APInt& highValue = I->High->getValue();
uint64_t lo = (lowValue - lowBound).getZExtValue();
uint64_t hi = (highValue - lowBound).getZExtValue();
@@ -2663,45 +2639,42 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
return true;
}
-/// Clusterify - Transform simple list of Cases into list of CaseRange's
-void SelectionDAGBuilder::Clusterify(CaseVector& Cases,
- const SwitchInst& SI) {
+void SelectionDAGBuilder::Clusterify(CaseVector &Cases, const SwitchInst *SI) {
BranchProbabilityInfo *BPI = FuncInfo.BPI;
- // Start with "simple" cases.
- for (SwitchInst::ConstCaseIt i : SI.cases()) {
- const BasicBlock *SuccBB = i.getCaseSuccessor();
- MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
-
- uint32_t ExtraWeight =
- BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0;
-
- Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
- SMBB, ExtraWeight));
- }
- std::sort(Cases.begin(), Cases.end(), CaseCmp());
-
- // Merge case into clusters
- if (Cases.size() >= 2)
- // Must recompute end() each iteration because it may be
- // invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = std::next(Cases.begin());
- J != Cases.end(); ) {
- const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
- const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
- MachineBasicBlock* nextBB = J->BB;
- MachineBasicBlock* currentBB = I->BB;
-
- // If the two neighboring cases go to the same destination, merge them
- // into a single case.
- if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
- I->High = J->High;
- I->ExtraWeight += J->ExtraWeight;
- J = Cases.erase(J);
- } else {
- I = J++;
- }
+
+ // Extract cases from the switch and sort them.
+ typedef std::pair<const ConstantInt*, unsigned> CasePair;
+ std::vector<CasePair> Sorted;
+ Sorted.reserve(SI->getNumCases());
+ for (auto I : SI->cases())
+ Sorted.push_back(std::make_pair(I.getCaseValue(), I.getSuccessorIndex()));
+ std::sort(Sorted.begin(), Sorted.end(), [](CasePair a, CasePair b) {
+ return a.first->getValue().slt(b.first->getValue());
+ });
+
+ // Merge adjacent cases with the same destination, build Cases vector.
+ assert(Cases.empty() && "Cases should be empty before Clusterify;");
+ Cases.reserve(SI->getNumCases());
+ MachineBasicBlock *PreviousSucc = nullptr;
+ for (CasePair &CP : Sorted) {
+ const ConstantInt *CaseVal = CP.first;
+ unsigned SuccIndex = CP.second;
+ MachineBasicBlock *Succ = FuncInfo.MBBMap[SI->getSuccessor(SuccIndex)];
+ uint32_t Weight = BPI ? BPI->getEdgeWeight(SI->getParent(), SuccIndex) : 0;
+
+ if (PreviousSucc == Succ &&
+ (CaseVal->getValue() - Cases.back().High->getValue()) == 1) {
+ // If this case has the same successor and is a neighbour, merge it into
+ // the previous cluster.
+ Cases.back().High = CaseVal;
+ Cases.back().ExtraWeight += Weight;
+ } else {
+ Cases.push_back(Case(CaseVal, CaseVal, Succ, Weight));
}
+ PreviousSucc = Succ;
+ }
+
DEBUG({
size_t numCmps = 0;
for (auto &I : Cases)
@@ -2729,16 +2702,10 @@ void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
- // Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = nullptr;
- if (SwitchMBB + 1 != FuncInfo.MF->end())
- NextBlock = SwitchMBB + 1;
-
-
// Create a vector of Cases, sorted so that we can efficiently create a binary
// search tree from them.
CaseVector Cases;
- Clusterify(Cases, SI);
+ Clusterify(Cases, &SI);
// Get the default destination MBB.
MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -2775,7 +2742,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchMBB->addSuccessor(Default);
// If this is not a fall-through branch, emit the branch.
- if (Default != NextBlock) {
+ if (Default != NextBlock(SwitchMBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
getControlRoot(), DAG.getBasicBlock(Default)));
}
@@ -3429,30 +3396,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Ty = StTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
+ MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS);
+ unsigned PtrSize = PtrTy.getSizeInBits();
+ APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
// If this is a constant subscript, handle it quickly.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->isZero()) continue;
- uint64_t Offs =
- DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- SDValue OffsVal;
- EVT PTy = TLI.getPointerTy(AS);
- unsigned PtrBits = PTy.getSizeInBits();
- if (PtrBits < 64)
- OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
- DAG.getConstant(Offs, MVT::i64));
- else
- OffsVal = DAG.getConstant(Offs, PTy);
-
- N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
- OffsVal);
+ if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+ if (CI->isZero())
+ continue;
+ APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+ SDValue OffsVal = DAG.getConstant(Offs, PtrTy);
+ N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal);
continue;
}
// N = N + Idx * ElementSize;
- APInt ElementSize =
- APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
@@ -3988,6 +3946,93 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
MVT::f32);
}
+static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
+ SelectionDAG &DAG) {
+ // IntegerPartOfX = ((int32_t)(t0);
+ SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+ // FractionalPartOfX = t0 - (float)IntegerPartOfX;
+ SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+ SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+ // IntegerPartOfX <<= 23;
+ IntegerPartOfX = DAG.getNode(
+ ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+ DAG.getConstant(23, DAG.getTargetLoweringInfo().getPointerTy()));
+
+ SDValue TwoToFractionalPartOfX;
+ if (LimitFloatPrecision <= 6) {
+ // For floating-point precision of 6:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.997535578f +
+ // (0.735607626f + 0.252464424f * x) * x;
+ //
+ // error 0.0144103317, which is 6 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3e814304));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3f3c50c8));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f7f5e7e));
+ } else if (LimitFloatPrecision <= 12) {
+ // For floating-point precision of 12:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999892986f +
+ // (0.696457318f +
+ // (0.224338339f + 0.792043434e-1f * x) * x) * x;
+ //
+ // error 0.000107046256, which is 13 to 14 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3da235e3));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3e65b8f3));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3f324b07));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3f7ff8fd));
+ } else { // LimitFloatPrecision <= 18
+ // For floating-point precision of 18:
+ //
+ // TwoToFractionalPartOfX =
+ // 0.999999982f +
+ // (0.693148872f +
+ // (0.240227044f +
+ // (0.554906021e-1f +
+ // (0.961591928e-2f +
+ // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+ // error 2.47208000*10^(-7), which is better than 18 bits
+ SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+ getF32Constant(DAG, 0x3924b03e));
+ SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+ getF32Constant(DAG, 0x3ab24b87));
+ SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+ SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+ getF32Constant(DAG, 0x3c1d8c17));
+ SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+ SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+ getF32Constant(DAG, 0x3d634a1d));
+ SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+ SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+ getF32Constant(DAG, 0x3e75fe14));
+ SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+ SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+ getF32Constant(DAG, 0x3f317234));
+ SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+ TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+ getF32Constant(DAG, 0x3f800000));
+ }
+
+ // Add the exponent into the result in integer domain.
+ SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
+}
+
/// expandExp - Lower an exp intrinsic. Handles the special sequences for
/// limited-precision mode.
static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
@@ -3999,92 +4044,10 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
// final result:
//
// #define LOG2OFe 1.4426950f
- // IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+ // t0 = Op * LOG2OFe
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
getF32Constant(DAG, 0x3fb8aa3b));
- SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
- // FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
- SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
- SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
- // IntegerPartOfX <<= 23;
- IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, TLI.getPointerTy()));
-
- SDValue TwoToFracPartOfX;
- if (LimitFloatPrecision <= 6) {
- // For floating-point precision of 6:
- //
- // TwoToFractionalPartOfX =
- // 0.997535578f +
- // (0.735607626f + 0.252464424f * x) * x;
- //
- // error 0.0144103317, which is 6 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3e814304));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3f3c50c8));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- } else if (LimitFloatPrecision <= 12) {
- // For floating-point precision of 12:
- //
- // TwoToFractionalPartOfX =
- // 0.999892986f +
- // (0.696457318f +
- // (0.224338339f + 0.792043434e-1f * x) * x) * x;
- //
- // 0.000107046256 error, which is 13 to 14 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3da235e3));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3e65b8f3));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f324b07));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- } else { // LimitFloatPrecision <= 18
- // For floating-point precision of 18:
- //
- // TwoToFractionalPartOfX =
- // 0.999999982f +
- // (0.693148872f +
- // (0.240227044f +
- // (0.554906021e-1f +
- // (0.961591928e-2f +
- // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
- //
- // error 2.47208000*10^(-7), which is better than 18 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3924b03e));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3ab24b87));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3c1d8c17));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3d634a1d));
- SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
- getF32Constant(DAG, 0x3e75fe14));
- SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
- getF32Constant(DAG, 0x3f317234));
- SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- }
-
- // Add the exponent into the result in integer domain.
- SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- t13, IntegerPartOfX));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
}
// No special expansion.
@@ -4375,91 +4338,8 @@ static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
if (Op.getValueType() == MVT::f32 &&
- LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
-
- // FractionalPartOfX = x - (float)IntegerPartOfX;
- SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
- SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
-
- // IntegerPartOfX <<= 23;
- IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, TLI.getPointerTy()));
-
- SDValue TwoToFractionalPartOfX;
- if (LimitFloatPrecision <= 6) {
- // For floating-point precision of 6:
- //
- // TwoToFractionalPartOfX =
- // 0.997535578f +
- // (0.735607626f + 0.252464424f * x) * x;
- //
- // error 0.0144103317, which is 6 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3e814304));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3f3c50c8));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- } else if (LimitFloatPrecision <= 12) {
- // For floating-point precision of 12:
- //
- // TwoToFractionalPartOfX =
- // 0.999892986f +
- // (0.696457318f +
- // (0.224338339f + 0.792043434e-1f * x) * x) * x;
- //
- // error 0.000107046256, which is 13 to 14 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3da235e3));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3e65b8f3));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f324b07));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- } else { // LimitFloatPrecision <= 18
- // For floating-point precision of 18:
- //
- // TwoToFractionalPartOfX =
- // 0.999999982f +
- // (0.693148872f +
- // (0.240227044f +
- // (0.554906021e-1f +
- // (0.961591928e-2f +
- // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
- // error 2.47208000*10^(-7), which is better than 18 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3924b03e));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3ab24b87));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3c1d8c17));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3d634a1d));
- SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
- getF32Constant(DAG, 0x3e75fe14));
- SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
- getF32Constant(DAG, 0x3f317234));
- SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- }
-
- // Add the exponent into the result in integer domain.
- SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32,
- TwoToFractionalPartOfX);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- t13, IntegerPartOfX));
- }
+ LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
+ return getLimitedPrecisionExp2(Op, dl, DAG);
// No special expansion.
return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
@@ -4483,90 +4363,10 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
// final result:
//
// #define LOG2OF10 3.3219281f
- // IntegerPartOfX = (int32_t)(x * LOG2OF10);
+ // t0 = Op * LOG2OF10;
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
getF32Constant(DAG, 0x40549a78));
- SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
-
- // FractionalPartOfX = x - (float)IntegerPartOfX;
- SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
- SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
-
- // IntegerPartOfX <<= 23;
- IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
- DAG.getConstant(23, TLI.getPointerTy()));
-
- SDValue TwoToFractionalPartOfX;
- if (LimitFloatPrecision <= 6) {
- // For floating-point precision of 6:
- //
- // twoToFractionalPartOfX =
- // 0.997535578f +
- // (0.735607626f + 0.252464424f * x) * x;
- //
- // error 0.0144103317, which is 6 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3e814304));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3f3c50c8));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f7f5e7e));
- } else if (LimitFloatPrecision <= 12) {
- // For floating-point precision of 12:
- //
- // TwoToFractionalPartOfX =
- // 0.999892986f +
- // (0.696457318f +
- // (0.224338339f + 0.792043434e-1f * x) * x) * x;
- //
- // error 0.000107046256, which is 13 to 14 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3da235e3));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3e65b8f3));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3f324b07));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3f7ff8fd));
- } else { // LimitFloatPrecision <= 18
- // For floating-point precision of 18:
- //
- // TwoToFractionalPartOfX =
- // 0.999999982f +
- // (0.693148872f +
- // (0.240227044f +
- // (0.554906021e-1f +
- // (0.961591928e-2f +
- // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
- // error 2.47208000*10^(-7), which is better than 18 bits
- SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
- getF32Constant(DAG, 0x3924b03e));
- SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
- getF32Constant(DAG, 0x3ab24b87));
- SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
- SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
- getF32Constant(DAG, 0x3c1d8c17));
- SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
- SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
- getF32Constant(DAG, 0x3d634a1d));
- SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
- SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
- getF32Constant(DAG, 0x3e75fe14));
- SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
- SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
- getF32Constant(DAG, 0x3f317234));
- SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
- TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
- getF32Constant(DAG, 0x3f800000));
- }
-
- SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::ADD, dl, MVT::i32,
- t13, IntegerPartOfX));
+ return getLimitedPrecisionExp2(t0, dl, DAG);
}
// No special expansion.
@@ -5114,34 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return nullptr;
}
- case Intrinsic::x86_avx_vinsertf128_pd_256:
- case Intrinsic::x86_avx_vinsertf128_ps_256:
- case Intrinsic::x86_avx_vinsertf128_si_256:
- case Intrinsic::x86_avx2_vinserti128: {
- EVT DestVT = TLI.getValueType(I.getType());
- EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
- uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
- ElVT.getVectorNumElements();
- Res =
- DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
- getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
- DAG.getConstant(Idx, TLI.getVectorIdxTy()));
- setValue(&I, Res);
- return nullptr;
- }
- case Intrinsic::x86_avx_vextractf128_pd_256:
- case Intrinsic::x86_avx_vextractf128_ps_256:
- case Intrinsic::x86_avx_vextractf128_si_256:
- case Intrinsic::x86_avx2_vextracti128: {
- EVT DestVT = TLI.getValueType(I.getType());
- uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
- DestVT.getVectorNumElements();
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
- getValue(I.getArgOperand(0)),
- DAG.getConstant(Idx, TLI.getVectorIdxTy()));
- setValue(&I, Res);
- return nullptr;
- }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -5539,7 +5311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
SmallVector<Value *, 4> Allocas;
- GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL);
+ GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
E = Allocas.end(); Object != E; ++Object) {
@@ -5618,45 +5390,47 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
- case Intrinsic::frameallocate: {
+ case Intrinsic::frameescape: {
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
- // Do the allocation and map it as a normal value.
- // FIXME: Maybe we should add this to the alloca map so that we don't have
- // to register allocate it?
- uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
- int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size);
- MVT PtrVT = TLI.getPointerTy(0);
- SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT);
- setValue(&I, FIVal);
-
- // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is
- // the same on all targets.
- MCSymbol *FrameAllocSym =
- MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName());
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
- TII->get(TargetOpcode::FRAME_ALLOC))
- .addSym(FrameAllocSym)
- .addFrameIndex(Alloc);
+ // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission
+ // is the same on all targets.
+ for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+ AllocaInst *Slot =
+ cast<AllocaInst>(I.getArgOperand(Idx)->stripPointerCasts());
+ assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+ "can only escape static allocas");
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ MCSymbol *FrameAllocSym =
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName(),
+ Idx);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
+ TII->get(TargetOpcode::FRAME_ALLOC))
+ .addSym(FrameAllocSym)
+ .addFrameIndex(FI);
+ }
return nullptr;
}
case Intrinsic::framerecover: {
- // i8* @llvm.framerecover(i8* %fn, i8* %fp)
+ // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx)
MachineFunction &MF = DAG.getMachineFunction();
MVT PtrVT = TLI.getPointerTy(0);
// Get the symbol that defines the frame offset.
- Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
+ auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
+ unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
MCSymbol *FrameAllocSym =
- MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName());
+ MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName(),
+ IdxVal);
// Create a TargetExternalSymbol for the label to avoid any target lowering
// that would make this PC relative.
StringRef Name = FrameAllocSym->getName();
- assert(Name.size() == strlen(Name.data()) && "not null terminated");
+ assert(Name.data()[Name.size()] == '\0' && "not null terminated");
SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT);
SDValue OffsetVal =
DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
@@ -5672,6 +5446,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_begincatch:
case Intrinsic::eh_endcatch:
llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
+ case Intrinsic::eh_unwindhelp: {
+ AllocaInst *Slot =
+ cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+ assert(FuncInfo.StaticAllocaMap.count(Slot) &&
+ "can only use static allocas with llvm.eh.unwindhelp");
+ int FI = FuncInfo.StaticAllocaMap[Slot];
+ // TODO: Save this in the not-yet-existant WinEHFuncInfo struct.
+ (void)FI;
+ return nullptr;
+ }
}
}
@@ -5805,9 +5589,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
- if (const Constant *LoadCst =
- ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
- Builder.DL))
+ if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr(
+ const_cast<Constant *>(LoadInput), *Builder.DL))
return Builder.getValue(LoadCst);
}
@@ -6748,10 +6531,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Memory output, or 'other' output (e.g. 'X' constraint).
assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
// Add information to the INLINEASM node to know about this output.
unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
- TLI.getPointerTy()));
+ OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
}
@@ -6855,6 +6643,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
"Unexpected number of operands");
// Add information to the INLINEASM node to know about this input.
// See InlineAsm.h isUseOperandTiedToDef.
+ OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
OpInfo.getMatchedOperand());
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
@@ -6894,10 +6683,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
"Memory operands expect pointer values");
+ unsigned ConstraintID =
+ TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
+ assert(ConstraintID != InlineAsm::Constraint_Unknown &&
+ "Failed to convert memory constraint code to constraint id.");
+
// Add information to the INLINEASM node to know about this input.
unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
- AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
- TLI.getPointerTy()));
+ ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, MVT::i32));
AsmNodeOperands.push_back(InOperandVal);
break;
}
@@ -7901,8 +7695,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
- // Check successor nodes' PHI nodes that expect a constant to be available
- // from this block.
+ // Check PHI nodes in successors that expect a value to be available from this
+ // block.
for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
const BasicBlock *SuccBB = TI->getSuccessor(succ);
if (!isa<PHINode>(SuccBB->begin())) continue;
@@ -7989,3 +7783,10 @@ AddSuccessorMBB(const BasicBlock *BB,
SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
return SuccMBB;
}
+
+MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
+ MachineFunction::iterator I = MBB;
+ if (++I == FuncInfo.MF->end())
+ return nullptr;
+ return I;
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ad7411f..30240d8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -137,19 +137,19 @@ private:
/// Case - A struct to record the Value for a switch case, and the
/// case's target basic block.
struct Case {
- const Constant *Low;
- const Constant *High;
+ const ConstantInt *Low;
+ const ConstantInt *High;
MachineBasicBlock* BB;
uint32_t ExtraWeight;
Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
- Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
+ Case(const ConstantInt *low, const ConstantInt *high, MachineBasicBlock *bb,
uint32_t extraweight) : Low(low), High(high), BB(bb),
ExtraWeight(extraweight) { }
APInt size() const {
- const APInt &rHigh = cast<ConstantInt>(High)->getValue();
- const APInt &rLow = cast<ConstantInt>(Low)->getValue();
+ const APInt &rHigh = High->getValue();
+ const APInt &rLow = Low->getValue();
return (rHigh - rLow + 1ULL);
}
};
@@ -173,7 +173,7 @@ private:
/// CaseRec - A struct with ctor used in lowering switches to a binary tree
/// of conditional branches.
struct CaseRec {
- CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+ CaseRec(MachineBasicBlock *bb, const ConstantInt *lt, const ConstantInt *ge,
CaseRange r) :
CaseBB(bb), LT(lt), GE(ge), Range(r) {}
@@ -181,8 +181,8 @@ private:
MachineBasicBlock *CaseBB;
/// LT, GE - If nonzero, we know the current case value must be less-than or
/// greater-than-or-equal-to these Constants.
- const Constant *LT;
- const Constant *GE;
+ const ConstantInt *LT;
+ const ConstantInt *GE;
/// Range - A pair of iterators representing the range of case values to be
/// processed at this point in the binary search tree.
CaseRange Range;
@@ -190,24 +190,15 @@ private:
typedef std::vector<CaseRec> CaseRecVector;
- /// The comparison function for sorting the switch case values in the vector.
- /// WARNING: Case ranges should be disjoint!
- struct CaseCmp {
- bool operator()(const Case &C1, const Case &C2) {
- assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
- const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
- const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
- return CI1->getValue().slt(CI2->getValue());
- }
- };
-
struct CaseBitsCmp {
bool operator()(const CaseBits &C1, const CaseBits &C2) {
return C1.Bits > C2.Bits;
}
};
- void Clusterify(CaseVector &Cases, const SwitchInst &SI);
+ /// Populate Cases with the cases in SI, clustering adjacent cases with the
+ /// same destination together.
+ void Clusterify(CaseVector &Cases, const SwitchInst *SI);
/// CaseBlock - This structure is used to communicate between
/// SelectionDAGBuilder and SDISel for the code generation of additional basic
@@ -606,6 +597,10 @@ public:
void visit(unsigned Opcode, const User &I);
+ /// getCopyFromRegs - If there was virtual register allocated for the value V
+ /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+ SDValue getCopyFromRegs(const Value *V, Type *Ty);
+
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void resolveDanglingDebugInfo(const Value *V, SDValue Val);
@@ -622,8 +617,7 @@ public:
void removeValue(const Value *V) {
// This is to support hack in lowerCallFromStatepoint
// Should be removed when hack is resolved
- if (NodeMap.count(V))
- NodeMap.erase(V);
+ NodeMap.erase(V);
}
void setUnusedArgValue(const Value *V, SDValue NewN) {
@@ -662,7 +656,9 @@ public:
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
// This function is responsible for the whole statepoint lowering process.
- void LowerStatepoint(ImmutableStatepoint Statepoint);
+ // It uniformly handles invoke and call statepoints.
+ void LowerStatepoint(ImmutableStatepoint Statepoint,
+ MachineBasicBlock *LandingPad = nullptr);
private:
std::pair<SDValue, SDValue> lowerInvokable(
TargetLowering::CallLoweringInfo &CLI,
@@ -830,6 +826,9 @@ private:
bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr,
int64_t Offset, bool IsIndirect,
const SDValue &N);
+
+ /// Return the next block after MBB, or nullptr if there is none.
+ MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
};
} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 17eff94..5898da4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -95,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::FRAME_ALLOC_RECOVER: return "FRAME_ALLOC_RECOVER";
case ISD::READ_REGISTER: return "READ_REGISTER";
case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 5e867cf..4d2af3f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -168,14 +168,13 @@ static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
"instruction selector"));
-static cl::opt<bool>
-EnableFastISelAbort("fast-isel-abort", cl::Hidden,
- cl::desc("Enable abort calls when \"fast\" instruction selection "
- "fails to lower an instruction"));
-static cl::opt<bool>
-EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden,
- cl::desc("Enable abort calls when \"fast\" instruction selection "
- "fails to lower a formal argument"));
+static cl::opt<int> EnableFastISelAbort(
+ "fast-isel-abort", cl::Hidden,
+ cl::desc("Enable abort calls when \"fast\" instruction selection "
+ "fails to lower an instruction: 0 disable the abort, 1 will "
+ "abort but for args, calls and terminators, 2 will also "
+ "abort for argument lowering, and 3 will never fallback "
+ "to SelectionDAG."));
static cl::opt<bool>
UseMBPI("use-mbpi",
@@ -293,7 +292,8 @@ namespace llvm {
const TargetLowering *TLI = IS->TLI;
const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
- if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() ||
+ if (OptLevel == CodeGenOpt::None ||
+ (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
TLI->getSchedulingPreference() == Sched::Source)
return createSourceListDAGScheduler(IS, OptLevel);
if (TLI->getSchedulingPreference() == Sched::RegPressure)
@@ -416,7 +416,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
"-fast-isel-verbose requires -fast-isel");
assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
- "-fast-isel-abort requires -fast-isel");
+ "-fast-isel-abort > 0 requires -fast-isel");
const Function &Fn = *mf.getFunction();
MF = &mf;
@@ -595,9 +595,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
- // Lower all of the non-terminator instructions. If a call is emitted
- // as a tail call, cease emitting nodes for this block. Terminators
- // are handled below.
+ // Lower the instructions. If a call is emitted as a tail call, cease emitting
+ // nodes for this block.
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
SDB->visit(*I);
@@ -1182,8 +1181,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (!FastIS->lowerArguments()) {
// Fast isel failed to lower these arguments
++NumFastIselFailLowerArguments;
- if (EnableFastISelAbortArgs)
- llvm_unreachable("FastISel didn't lower all arguments");
+ if (EnableFastISelAbort > 1)
+ report_fatal_error("FastISel didn't lower all arguments");
// Use SelectionDAG argument lowering
LowerArguments(Fn);
@@ -1252,6 +1251,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
dbgs() << "FastISel missed call: ";
Inst->dump();
}
+ if (EnableFastISelAbort > 2)
+ // FastISel selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ report_fatal_error("FastISel didn't select the entire block");
if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
unsigned &R = FuncInfo->ValueMap[Inst];
@@ -1279,24 +1282,24 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
- if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
- // Don't abort, and use a different message for terminator misses.
- NumFastIselFailures += NumFastIselRemaining;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
+ bool ShouldAbort = EnableFastISelAbort;
+ if (EnableFastISelVerbose || EnableFastISelAbort) {
+ if (isa<TerminatorInst>(Inst)) {
+ // Use a different message for terminator misses.
dbgs() << "FastISel missed terminator: ";
- Inst->dump();
- }
- } else {
- NumFastIselFailures += NumFastIselRemaining;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
+ // Don't abort unless for terminator unless the level is really high
+ ShouldAbort = (EnableFastISelAbort > 2);
+ } else {
dbgs() << "FastISel miss: ";
- Inst->dump();
}
- if (EnableFastISelAbort)
- // The "fast" selector couldn't handle something and bailed.
- // For the purpose of debugging, just abort.
- llvm_unreachable("FastISel didn't select the entire block");
+ Inst->dump();
}
+ if (ShouldAbort)
+ // FastISel selector couldn't handle something and bailed.
+ // For the purpose of debugging, just abort.
+ report_fatal_error("FastISel didn't select the entire block");
+
+ NumFastIselFailures += NumFastIselRemaining;
break;
}
@@ -1775,9 +1778,23 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
} else {
assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
"Memory operand with multiple values?");
+
+ unsigned TiedToOperand;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) {
+ // We need the constraint ID from the operand this is tied to.
+ unsigned CurOp = InlineAsm::Op_FirstOperand;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ for (; TiedToOperand; --TiedToOperand) {
+ CurOp += InlineAsm::getNumOperandRegisters(Flags)+1;
+ Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue();
+ }
+ }
+
// Otherwise, this is a memory operand. Ask the target to select it.
std::vector<SDValue> SelOps;
- if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+ if (SelectInlineAsmMemoryOperand(InOps[i+1],
+ InlineAsm::getMemoryConstraintID(Flags),
+ SelOps))
report_fatal_error("Could not match memory address. Inline asm"
" failure!");
@@ -1933,7 +1950,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops);
- EVT VTs[] = { MVT::Other, MVT::Glue };
+ const EVT VTs[] = {MVT::Other, MVT::Glue};
SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops);
New->setNodeId(-1);
return New.getNode();
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 1271f6b..3cc7a98 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -224,6 +224,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
/// call node. Also update NodeMap so that getValue(statepoint) will
/// reference lowered call result
static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
+ MachineBasicBlock *LandingPad,
SelectionDAGBuilder &Builder) {
ImmutableCallSite CS(StatepointSite.getCallSite());
@@ -245,15 +246,29 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
Tmp->setTailCall(CS.isTailCall());
Tmp->setCallingConv(CS.getCallingConv());
Tmp->setAttributes(CS.getAttributes());
- Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false);
+ Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false, LandingPad);
// Handle the return value of the call iff any.
const bool HasDef = !Tmp->getType()->isVoidTy();
if (HasDef) {
- // The value of the statepoint itself will be the value of call itself.
- // We'll replace the actually call node shortly. gc_result will grab
- // this value.
- Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp));
+ if (CS.isInvoke()) {
+ // Result value will be used in different basic block for invokes
+ // so we need to export it now. But statepoint call has a different type
+ // than the actuall call. It means that standart exporting mechanism will
+ // create register of the wrong type. So instead we need to create
+ // register with correct type and save value into it manually.
+ // TODO: To eliminate this problem we can remove gc.result intrinsics
+ // completelly and make statepoint call to return a tuple.
+ unsigned reg = Builder.FuncInfo.CreateRegs(Tmp->getType());
+ Builder.CopyValueToVirtualRegister(Tmp, reg);
+ Builder.FuncInfo.ValueMap[CS.getInstruction()] = reg;
+ }
+ else {
+ // The value of the statepoint itself will be the value of call itself.
+ // We'll replace the actually call node shortly. gc_result will grab
+ // this value.
+ Builder.setValue(CS.getInstruction(), Builder.getValue(Tmp));
+ }
} else {
// The token value is never used from here on, just generate a poison value
Builder.setValue(CS.getInstruction(), Builder.DAG.getIntPtrConstant(-1));
@@ -267,6 +282,15 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
// Search for the call node
// The following code is essentially reverse engineering X86's
// LowerCallTo.
+ // We are expecting DAG to have the following form:
+ // ch = eh_label (only in case of invoke statepoint)
+ // ch, glue = callseq_start ch
+ // ch, glue = X86::Call ch, glue
+ // ch, glue = callseq_end ch, glue
+ // ch = eh_label ch (only in case of invoke statepoint)
+ //
+ // DAG root will be either last eh_label or callseq_end.
+
SDNode *CallNode = nullptr;
// We just emitted a call, so it should be last thing generated
@@ -276,8 +300,11 @@ static SDNode *lowerCallFromStatepoint(ImmutableStatepoint StatepointSite,
SDNode *CallEnd = Chain.getNode();
int Sanity = 0;
while (CallEnd->getOpcode() != ISD::CALLSEQ_END) {
- CallEnd = CallEnd->getGluedNode();
- assert(CallEnd && "Can not find call node");
+ assert(CallEnd->getNumOperands() >= 1 &&
+ CallEnd->getOperand(0).getValueType() == MVT::Other);
+
+ CallEnd = CallEnd->getOperand(0).getNode();
+
assert(Sanity < 20 && "should have found call end already");
Sanity++;
}
@@ -506,7 +533,9 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
LowerStatepoint(ImmutableStatepoint(&CI));
}
-void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) {
+void
+SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
+ MachineBasicBlock *LandingPad/*=nullptr*/) {
// The basic scheme here is that information about both the original call and
// the safepoint is encoded in the CallInst. We create a temporary call and
// lower it, then reverse engineer the calling sequence.
@@ -542,13 +571,12 @@ void SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP) {
}
#endif
-
// Lower statepoint vmstate and gcstate arguments
SmallVector<SDValue, 10> LoweredArgs;
lowerStatepointMetaArgs(LoweredArgs, ISP, *this);
// Get call node, we will replace it later with statepoint
- SDNode *CallNode = lowerCallFromStatepoint(ISP, *this);
+ SDNode *CallNode = lowerCallFromStatepoint(ISP, LandingPad, *this);
// Construct the actual STATEPOINT node with all the appropriate arguments
// and return values.
@@ -634,7 +662,24 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
assert(isStatepoint(I) &&
"first argument must be a statepoint token");
- setValue(&CI, getValue(I));
+ if (isa<InvokeInst>(I)) {
+ // For invokes we should have stored call result in a virtual register.
+ // We can not use default getValue() functionality to copy value from this
+ // register because statepoint and actuall call return types can be
+ // different, and getValue() will use CopyFromReg of the wrong type,
+ // which is always i32 in our case.
+ PointerType *CalleeType = cast<PointerType>(
+ ImmutableStatepoint(I).actualCallee()->getType());
+ Type *RetTy = cast<FunctionType>(
+ CalleeType->getElementType())->getReturnType();
+ SDValue CopyFromReg = getCopyFromRegs(I, RetTy);
+
+ assert(CopyFromReg.getNode());
+ setValue(&CI, CopyFromReg);
+ }
+ else {
+ setValue(&CI, getValue(I));
+ }
}
void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0a3c926..ddbf0b2 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -96,18 +96,21 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
for (unsigned i = 0; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
+ Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
Args.push_back(Entry);
}
+ if (LC == RTLIB::UNKNOWN_LIBCALL)
+ report_fatal_error("Unsupported library call operation!");
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ .setSExtResult(signExtend).setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index f6393a5..66a6a3c 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -53,10 +53,10 @@ private:
Type *GetConcreteStackEntryType(Function &F);
void CollectRoots(Function &F);
static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
- Value *BasePtr, int Idx1,
+ Type *Ty, Value *BasePtr, int Idx1,
const char *Name);
static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B,
- Value *BasePtr, int Idx1, int Idx2,
+ Type *Ty, Value *BasePtr, int Idx1, int Idx2,
const char *Name);
};
}
@@ -343,13 +343,14 @@ void ShadowStackGCLowering::CollectRoots(Function &F) {
}
GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
- IRBuilder<> &B, Value *BasePtr,
- int Idx, int Idx2,
- const char *Name) {
+ IRBuilder<> &B, Type *Ty,
+ Value *BasePtr, int Idx,
+ int Idx2,
+ const char *Name) {
Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
ConstantInt::get(Type::getInt32Ty(Context), Idx),
ConstantInt::get(Type::getInt32Ty(Context), Idx2)};
- Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+ Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -357,11 +358,11 @@ GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
}
GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context,
- IRBuilder<> &B, Value *BasePtr,
+ IRBuilder<> &B, Type *Ty, Value *BasePtr,
int Idx, const char *Name) {
Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0),
ConstantInt::get(Type::getInt32Ty(Context), Idx)};
- Value *Val = B.CreateGEP(BasePtr, Indices, Name);
+ Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name);
assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
@@ -402,14 +403,15 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Initialize the map pointer and load the current head of the shadow stack.
Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
- Instruction *EntryMapPtr =
- CreateGEP(Context, AtEntry, StackEntry, 0, 1, "gc_frame.map");
+ Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, 1, "gc_frame.map");
AtEntry.CreateStore(FrameMap, EntryMapPtr);
// After all the allocas...
for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
// For each root, find the corresponding slot in the aggregate...
- Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+ Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 1 + I, "gc_root");
// And use it in lieu of the alloca.
AllocaInst *OriginalAlloca = Roots[I].second;
@@ -426,10 +428,10 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
AtEntry.SetInsertPoint(IP->getParent(), IP);
// Push the entry onto the shadow stack.
- Instruction *EntryNextPtr =
- CreateGEP(Context, AtEntry, StackEntry, 0, 0, "gc_frame.next");
- Instruction *NewHeadVal =
- CreateGEP(Context, AtEntry, StackEntry, 0, "gc_newhead");
+ Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, 0, "gc_frame.next");
+ Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
+ StackEntry, 0, "gc_newhead");
AtEntry.CreateStore(CurrentHead, EntryNextPtr);
AtEntry.CreateStore(NewHeadVal, Head);
@@ -439,7 +441,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
// Pop the entry from the shadow stack. Don't reuse CurrentHead from
// AtEntry, since that would make the value live for the entire function.
Instruction *EntryNextPtr2 =
- CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next");
+ CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
+ "gc_frame.next");
Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
AtExit->CreateStore(SavedHead, Head);
}
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 35e4292..2335a88 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -128,7 +128,8 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
Value *Zero = ConstantInt::get(Int32Ty, 0);
Value *One = ConstantInt::get(Int32Ty, 1);
Value *Idxs[2] = { Zero, One };
- Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
+ Value *CallSite =
+ Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site");
// Insert a store of the call-site number
ConstantInt *CallSiteNoC =
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index d46621d..025ae70 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -127,7 +127,7 @@ void SlotIndexes::renumberIndexes() {
void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
// Number indexes with half the default spacing so we can catch up quickly.
const unsigned Space = SlotIndex::InstrDist/2;
- assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+ static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM");
IndexList::iterator startItr = std::prev(curItr);
unsigned index = startItr->getIndex();
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index faf94b6..7572803 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -364,7 +364,7 @@ void StackColoring::calculateLocalLiveness() {
}
}
- BBSet = NextBBSet;
+ BBSet = std::move(NextBBSet);
}// while changed.
}
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 767f43a..d88be57 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -14,24 +14,24 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/StackMapLivenessAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "stackmaps"
-namespace llvm {
-cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness",
- cl::Hidden, cl::init(true),
- cl::desc("Enable PatchPoint Liveness Analysis Pass"));
-}
+static cl::opt<bool> EnablePatchPointLiveness(
+ "enable-patchpoint-liveness", cl::Hidden, cl::init(true),
+ cl::desc("Enable PatchPoint Liveness Analysis Pass"));
STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped");
@@ -39,6 +39,46 @@ STATISTIC(NumBBsVisited, "Number of basic blocks visited");
STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap");
STATISTIC(NumStackMaps, "Number of StackMaps visited");
+namespace {
+/// \brief This pass calculates the liveness information for each basic block in
+/// a function and attaches the register live-out information to a patchpoint
+/// intrinsic if present.
+///
+/// This pass can be disabled via the -enable-patchpoint-liveness=false flag.
+/// The pass skips functions that don't have any patchpoint intrinsics. The
+/// information provided by this pass is optional and not required by the
+/// aformentioned intrinsic to function.
+class StackMapLiveness : public MachineFunctionPass {
+ MachineFunction *MF;
+ const TargetRegisterInfo *TRI;
+ LivePhysRegs LiveRegs;
+
+public:
+ static char ID;
+
+ /// \brief Default construct and initialize the pass.
+ StackMapLiveness();
+
+ /// \brief Tell the pass manager which passes we depend on and what
+ /// information we preserve.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// \brief Calculate the liveness information for the given machine function.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// \brief Performs the actual liveness calculation for the function.
+ bool calculateLiveness();
+
+ /// \brief Add the current register live set to the instruction.
+ void addLiveOutSetToMI(MachineInstr &MI);
+
+ /// \brief Create a register mask and initialize it with the registers from
+ /// the register live set.
+ uint32_t *createRegisterMask() const;
+};
+} // namespace
+
char StackMapLiveness::ID = 0;
char &llvm::StackMapLivenessID = StackMapLiveness::ID;
INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness",
@@ -60,18 +100,18 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
}
/// Calculate the liveness information for the given machine function.
-bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
+bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
if (!EnablePatchPointLiveness)
return false;
- DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
- << _MF.getName() << " **********\n");
- MF = &_MF;
- TRI = MF->getSubtarget().getRegisterInfo();
+ DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName()
+ << " **********\n");
+ this->MF = &MF;
+ TRI = MF.getSubtarget().getRegisterInfo();
++NumStackMapFuncVisited;
// Skip this function if there are no patchpoints to process.
- if (!MF->getFrameInfo()->hasPatchPoint()) {
+ if (!MF.getFrameInfo()->hasPatchPoint()) {
++NumStackMapFuncSkipped;
return false;
}
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 5d46419..aa18dea 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -19,8 +19,6 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -76,10 +74,21 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
llvm_unreachable("Unsupported stackmap version!");
}
+/// Go up the super-register chain until we hit a valid dwarf register number.
+static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
+ int RegNo = TRI->getDwarfRegNum(Reg, false);
+ for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
+ RegNo = TRI->getDwarfRegNum(*SR, false);
+
+ assert(RegNo >= 0 && "Invalid Dwarf register number.");
+ return (unsigned) RegNo;
+}
+
MachineInstr::const_mop_iterator
StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
LocationVec &Locs, LiveOutVec &LiveOuts) const {
+ const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
if (MOI->isImm()) {
switch (MOI->getImm()) {
default: llvm_unreachable("Unrecognized operand type.");
@@ -89,7 +98,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
Size /= 8;
unsigned Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
- Locs.push_back(Location(StackMaps::Location::Direct, Size, Reg, Imm));
+ Locs.push_back(Location(StackMaps::Location::Direct, Size,
+ getDwarfRegNum(Reg, TRI), Imm));
break;
}
case StackMaps::IndirectMemRefOp: {
@@ -97,7 +107,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
assert(Size > 0 && "Need a valid size for indirect memory locations.");
unsigned Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
- Locs.push_back(Location(StackMaps::Location::Indirect, Size, Reg, Imm));
+ Locs.push_back(Location(StackMaps::Location::Indirect, Size,
+ getDwarfRegNum(Reg, TRI), Imm));
break;
}
case StackMaps::ConstantOp: {
@@ -122,12 +133,18 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
"Virtreg operands should have been rewritten before now.");
- const TargetRegisterClass *RC =
- AP.TM.getSubtargetImpl()->getRegisterInfo()->getMinimalPhysRegClass(
- MOI->getReg());
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
assert(!MOI->getSubReg() && "Physical subreg still around.");
+
+ unsigned Offset = 0;
+ unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI);
+ unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
+ unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg());
+ if (SubRegIdx)
+ Offset = TRI->getSubRegIdxOffset(SubRegIdx);
+
Locs.push_back(
- Location(Location::Register, RC->getSize(), MOI->getReg(), 0));
+ Location(Location::Register, RC->getSize(), RegNo, Offset));
return ++MOI;
}
@@ -137,14 +154,74 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
return ++MOI;
}
-/// Go up the super-register chain until we hit a valid dwarf register number.
-static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
- int RegNo = TRI->getDwarfRegNum(Reg, false);
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
- RegNo = TRI->getDwarfRegNum(*SR, false);
+void StackMaps::print(raw_ostream &OS) {
+ const TargetRegisterInfo *TRI =
+ AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr;
+ OS << WSMP << "callsites:\n";
+ for (const auto &CSI : CSInfos) {
+ const LocationVec &CSLocs = CSI.Locations;
+ const LiveOutVec &LiveOuts = CSI.LiveOuts;
- assert(RegNo >= 0 && "Invalid Dwarf register number.");
- return (unsigned) RegNo;
+ OS << WSMP << "callsite " << CSI.ID << "\n";
+ OS << WSMP << " has " << CSLocs.size() << " locations\n";
+
+ unsigned OperIdx = 0;
+ for (const auto &Loc : CSLocs) {
+ OS << WSMP << " Loc " << OperIdx << ": ";
+ switch (Loc.LocType) {
+ case Location::Unprocessed:
+ OS << "<Unprocessed operand>";
+ break;
+ case Location::Register:
+ OS << "Register ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ break;
+ case Location::Direct:
+ OS << "Direct ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ if (Loc.Offset)
+ OS << " + " << Loc.Offset;
+ break;
+ case Location::Indirect:
+ OS << "Indirect ";
+ if (TRI)
+ OS << TRI->getName(Loc.Reg);
+ else
+ OS << Loc.Reg;
+ OS << "+" << Loc.Offset;
+ break;
+ case Location::Constant:
+ OS << "Constant " << Loc.Offset;
+ break;
+ case Location::ConstantIndex:
+ OS << "Constant Index " << Loc.Offset;
+ break;
+ }
+ OS << " [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size
+ << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
+ OperIdx++;
+ }
+
+ OS << WSMP << " has " << LiveOuts.size() << " live-out registers\n";
+
+ OperIdx = 0;
+ for (const auto &LO : LiveOuts) {
+ OS << WSMP << " LO " << OperIdx << ": ";
+ if (TRI)
+ OS << TRI->getName(LO.Reg);
+ else
+ OS << LO.Reg;
+ OS << " [encoding: .short " << LO.RegNo << ", .byte 0, .byte "
+ << LO.Size << "]\n";
+ OperIdx++;
+ }
+ }
}
/// Create a live-out register record for the given register Reg.
@@ -160,7 +237,7 @@ StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
StackMaps::LiveOutVec
StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
assert(Mask && "No register mask specified");
- const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
LiveOutVec LiveOuts;
// Create a LiveOutReg for each bit that is set in the register mask.
@@ -383,16 +460,13 @@ void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
/// 0x3, Indirect, [Reg + Offset] (spilled value)
/// 0x4, Constant, Offset (small constant)
/// 0x5, ConstIndex, Constants[Offset] (large constant)
-void StackMaps::emitCallsiteEntries(MCStreamer &OS,
- const TargetRegisterInfo *TRI) {
+void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
+ DEBUG(print(dbgs()));
// Callsite entries.
- DEBUG(dbgs() << WSMP << "callsites:\n");
for (const auto &CSI : CSInfos) {
const LocationVec &CSLocs = CSI.Locations;
const LiveOutVec &LiveOuts = CSI.LiveOuts;
- DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n");
-
// Verify stack map entry. It's better to communicate a problem to the
// runtime than crash in case of in-process compilation. Currently, we do
// simple overflow checks, but we may eventually communicate other
@@ -413,83 +487,20 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS,
// Reserved for flags.
OS.EmitIntValue(0, 2);
-
- DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n");
-
OS.EmitIntValue(CSLocs.size(), 2);
- unsigned OperIdx = 0;
for (const auto &Loc : CSLocs) {
- unsigned RegNo = 0;
- int Offset = Loc.Offset;
- if(Loc.Reg) {
- RegNo = getDwarfRegNum(Loc.Reg, TRI);
-
- // If this is a register location, put the subregister byte offset in
- // the location offset.
- if (Loc.LocType == Location::Register) {
- assert(!Loc.Offset && "Register location should have zero offset");
- unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
- unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, Loc.Reg);
- if (SubRegIdx)
- Offset = TRI->getSubRegIdxOffset(SubRegIdx);
- }
- }
- else {
- assert(Loc.LocType != Location::Register &&
- "Missing location register");
- }
-
- DEBUG(dbgs() << WSMP << " Loc " << OperIdx << ": ";
- switch (Loc.LocType) {
- case Location::Unprocessed:
- dbgs() << "<Unprocessed operand>";
- break;
- case Location::Register:
- dbgs() << "Register " << TRI->getName(Loc.Reg);
- break;
- case Location::Direct:
- dbgs() << "Direct " << TRI->getName(Loc.Reg);
- if (Loc.Offset)
- dbgs() << " + " << Loc.Offset;
- break;
- case Location::Indirect:
- dbgs() << "Indirect " << TRI->getName(Loc.Reg)
- << " + " << Loc.Offset;
- break;
- case Location::Constant:
- dbgs() << "Constant " << Loc.Offset;
- break;
- case Location::ConstantIndex:
- dbgs() << "Constant Index " << Loc.Offset;
- break;
- }
- dbgs() << " [encoding: .byte " << Loc.LocType
- << ", .byte " << Loc.Size
- << ", .short " << RegNo
- << ", .int " << Offset << "]\n";
- );
-
OS.EmitIntValue(Loc.LocType, 1);
OS.EmitIntValue(Loc.Size, 1);
- OS.EmitIntValue(RegNo, 2);
- OS.EmitIntValue(Offset, 4);
- OperIdx++;
+ OS.EmitIntValue(Loc.Reg, 2);
+ OS.EmitIntValue(Loc.Offset, 4);
}
- DEBUG(dbgs() << WSMP << " has " << LiveOuts.size()
- << " live-out registers\n");
-
// Num live-out registers and padding to align to 4 byte.
OS.EmitIntValue(0, 2);
OS.EmitIntValue(LiveOuts.size(), 2);
- OperIdx = 0;
for (const auto &LO : LiveOuts) {
- DEBUG(dbgs() << WSMP << " LO " << OperIdx << ": "
- << TRI->getName(LO.Reg)
- << " [encoding: .short " << LO.RegNo
- << ", .byte 0, .byte " << LO.Size << "]\n");
OS.EmitIntValue(LO.RegNo, 2);
OS.EmitIntValue(0, 1);
OS.EmitIntValue(LO.Size, 1);
@@ -512,7 +523,6 @@ void StackMaps::serializeToStackMapSection() {
MCContext &OutContext = AP.OutStreamer.getContext();
MCStreamer &OS = AP.OutStreamer;
- const TargetRegisterInfo *TRI = AP.TM.getSubtargetImpl()->getRegisterInfo();
// Create the section.
const MCSection *StackMapSection =
@@ -527,7 +537,7 @@ void StackMaps::serializeToStackMapSection() {
emitStackmapHeader(OS);
emitFunctionFrameRecords(OS);
emitConstantPoolEntries(OS);
- emitCallsiteEntries(OS, TRI);
+ emitCallsiteEntries(OS);
OS.AddBlankLine();
// Clean up.
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index cc72e5e..a5a175f 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -184,10 +184,18 @@ void StackSlotColoring::InitializeSlots() {
UsedColors.resize(LastFI);
Assignments.resize(LastFI);
+ typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair;
+ SmallVector<Pair *, 16> Intervals;
+ Intervals.reserve(LS->getNumIntervals());
+ for (auto &I : *LS)
+ Intervals.push_back(&I);
+ std::sort(Intervals.begin(), Intervals.end(),
+ [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; });
+
// Gather all spill slots into a list.
DEBUG(dbgs() << "Spill slot intervals:\n");
- for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
- LiveInterval &li = i->second;
+ for (auto *I : Intervals) {
+ LiveInterval &li = I->second;
DEBUG(li.dump());
int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
if (MFI->isDeadObjectIndex(FI))
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 2566c1f..38725b5 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -285,21 +285,20 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
unsigned SubIdx, unsigned &Size,
unsigned &Offset,
- const TargetMachine *TM) const {
+ const MachineFunction &MF) const {
if (!SubIdx) {
Size = RC->getSize();
Offset = 0;
return true;
}
- unsigned BitSize =
- TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxSize(SubIdx);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
// Convert bit size to byte size to be consistent with
// MCRegisterClass::getSize().
if (BitSize % 8)
return false;
- int BitOffset =
- TM->getSubtargetImpl()->getRegisterInfo()->getSubRegIdxOffset(SubIdx);
+ int BitOffset = TRI->getSubRegIdxOffset(SubIdx);
if (BitOffset < 0 || BitOffset % 8)
return false;
@@ -308,7 +307,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
- if (!TM->getDataLayout()->isLittleEndian()) {
+ if (!MF.getTarget().getDataLayout()->isLittleEndian()) {
Offset = RC->getSize() - (Offset + Size);
}
return true;
@@ -377,16 +376,13 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
llvm_unreachable("Not a MachO target");
}
-bool TargetInstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
+bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ ArrayRef<unsigned> Ops) const {
return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
}
-static MachineInstr* foldPatchpoint(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex,
+static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops, int FrameIndex,
const TargetInstrInfo &TII) {
unsigned StartIdx = 0;
switch (MI->getOpcode()) {
@@ -405,9 +401,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
// Return false if any operands requested for folding are not foldable (not
// part of the stackmap's live values).
- for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
- I != E; ++I) {
- if (*I < StartIdx)
+ for (unsigned Op : Ops) {
+ if (Op < StartIdx)
return nullptr;
}
@@ -427,8 +422,8 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
// Compute the spill slot size and offset.
const TargetRegisterClass *RC =
MF.getRegInfo().getRegClass(MO.getReg());
- bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize,
- SpillOffset, &MF.getTarget());
+ bool Valid =
+ TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF);
if (!Valid)
report_fatal_error("cannot spill patchpoint subregister operand");
MIB.addImm(StackMaps::IndirectMemRefOp);
@@ -448,10 +443,9 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
/// operand folded, otherwise NULL is returned. The client is responsible for
/// removing the old instruction and adding the new one in the instruction
/// stream.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FI) const {
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ ArrayRef<unsigned> Ops,
+ int FI) const {
unsigned Flags = 0;
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (MI->getOperand(Ops[i]).isDef())
@@ -517,10 +511,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
-MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
+MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
#ifndef NDEBUG
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 9048a44..58a6d52 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -664,6 +664,44 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
return UNKNOWN_LIBCALL;
}
+RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) {
+#define OP_TO_LIBCALL(Name, Enum) \
+ case Name: \
+ switch (VT.SimpleTy) { \
+ default: \
+ return UNKNOWN_LIBCALL; \
+ case MVT::i8: \
+ return Enum##_1; \
+ case MVT::i16: \
+ return Enum##_2; \
+ case MVT::i32: \
+ return Enum##_4; \
+ case MVT::i64: \
+ return Enum##_8; \
+ case MVT::i128: \
+ return Enum##_16; \
+ }
+
+ switch (Opc) {
+ OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
+ OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
+ OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
+ }
+
+#undef OP_TO_LIBCALL
+
+ return UNKNOWN_LIBCALL;
+}
+
/// InitCmpLibcallCCs - Set default comparison libcall CC.
///
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
@@ -695,12 +733,11 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
}
/// NOTE: The TargetMachine owns TLOF.
-TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm)
- : TM(tm), DL(TM.getDataLayout()) {
+TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
initActions();
// Perform these initializations only once.
- IsLittleEndian = DL->isLittleEndian();
+ IsLittleEndian = getDataLayout()->isLittleEndian();
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
= MaxStoresPerMemmoveOptSize = 4;
@@ -792,58 +829,21 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
// These library functions default to expand.
- setOperationAction(ISD::FLOG , MVT::f16, Expand);
- setOperationAction(ISD::FLOG2, MVT::f16, Expand);
- setOperationAction(ISD::FLOG10, MVT::f16, Expand);
- setOperationAction(ISD::FEXP , MVT::f16, Expand);
- setOperationAction(ISD::FEXP2, MVT::f16, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
- setOperationAction(ISD::FMINNUM, MVT::f16, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
- setOperationAction(ISD::FCEIL, MVT::f16, Expand);
- setOperationAction(ISD::FRINT, MVT::f16, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
- setOperationAction(ISD::FROUND, MVT::f16, Expand);
- setOperationAction(ISD::FLOG , MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10, MVT::f32, Expand);
- setOperationAction(ISD::FEXP , MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
- setOperationAction(ISD::FMINNUM, MVT::f32, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
- setOperationAction(ISD::FCEIL, MVT::f32, Expand);
- setOperationAction(ISD::FRINT, MVT::f32, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
- setOperationAction(ISD::FROUND, MVT::f32, Expand);
- setOperationAction(ISD::FLOG , MVT::f64, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
- setOperationAction(ISD::FLOG10, MVT::f64, Expand);
- setOperationAction(ISD::FEXP , MVT::f64, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
- setOperationAction(ISD::FMINNUM, MVT::f64, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
- setOperationAction(ISD::FCEIL, MVT::f64, Expand);
- setOperationAction(ISD::FRINT, MVT::f64, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
- setOperationAction(ISD::FROUND, MVT::f64, Expand);
- setOperationAction(ISD::FLOG , MVT::f128, Expand);
- setOperationAction(ISD::FLOG2, MVT::f128, Expand);
- setOperationAction(ISD::FLOG10, MVT::f128, Expand);
- setOperationAction(ISD::FEXP , MVT::f128, Expand);
- setOperationAction(ISD::FEXP2, MVT::f128, Expand);
- setOperationAction(ISD::FFLOOR, MVT::f128, Expand);
- setOperationAction(ISD::FMINNUM, MVT::f128, Expand);
- setOperationAction(ISD::FMAXNUM, MVT::f128, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand);
- setOperationAction(ISD::FCEIL, MVT::f128, Expand);
- setOperationAction(ISD::FRINT, MVT::f128, Expand);
- setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
- setOperationAction(ISD::FROUND, MVT::f128, Expand);
+ for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+ setOperationAction(ISD::FLOG , VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FEXP , VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FMINNUM, VT, Expand);
+ setOperationAction(ISD::FMAXNUM, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FROUND, VT, Expand);
+ }
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -859,7 +859,7 @@ MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
}
unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
- return DL->getPointerSizeInBits(AS);
+ return getDataLayout()->getPointerSizeInBits(AS);
}
unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
@@ -868,7 +868,7 @@ unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
}
MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
- return MVT::getIntegerVT(8*DL->getPointerSize(0));
+ return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0));
}
EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
@@ -1144,6 +1144,10 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
+// This function is in TargetLowering because it uses RegClassForVT which would
+// need to be moved to TargetRegisterInfo and would necessitate moving
+// isTypeLegal over as well - a massive change that would just require
+// TargetLowering having a TargetRegisterInfo class member that it would use.
std::pair<const TargetRegisterClass *, uint8_t>
TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
@@ -1498,7 +1502,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
- return DL->getABITypeAlignment(Ty);
+ return getDataLayout()->getABITypeAlignment(Ty);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index c1b34f7..bcf2aa7 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -31,6 +31,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -244,22 +245,9 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
-const MCSection *TargetLoweringObjectFileELF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler &Mang, const TargetMachine &TM) const {
- unsigned Flags = getELFSectionFlags(Kind);
-
- // If we have -ffunction-section or -fdata-section then we should emit the
- // global value to a uniqued section specifically for it.
- bool EmitUniqueSection = false;
- if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
- if (Kind.isText())
- EmitUniqueSection = TM.getFunctionSections();
- else
- EmitUniqueSection = TM.getDataSections();
- }
- EmitUniqueSection |= GV->hasComdat();
-
+static const MCSectionELF *selectELFSectionForGlobal(
+ MCContext &Ctx, const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags) {
unsigned EntrySize = 0;
if (Kind.isMergeableCString()) {
if (Kind.isMergeable2ByteCString()) {
@@ -309,9 +297,29 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Name.push_back('.');
TM.getNameWithPrefix(Name, GV, Mang, true);
}
- return getContext().getELFSection(Name, getELFSectionType(Name, Kind), Flags,
- EntrySize, Group,
- EmitUniqueSection && !UniqueSectionNames);
+ return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
+ EntrySize, Group,
+ EmitUniqueSection && !UniqueSectionNames);
+}
+
+const MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ unsigned Flags = getELFSectionFlags(Kind);
+
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniqueSection = false;
+ if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) {
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ }
+ EmitUniqueSection |= GV->hasComdat();
+
+ return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
+ EmitUniqueSection, Flags);
}
const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
@@ -323,7 +331,8 @@ const MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
if (!EmitUniqueSection)
return ReadOnlySection;
- return SelectSectionForGlobal(&F, SectionKind::getReadOnly(), Mang, TM);
+ return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
+ Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC);
}
bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
@@ -423,6 +432,11 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
// MachO
//===----------------------------------------------------------------------===//
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+ : TargetLoweringObjectFile() {
+ SupportIndirectSymViaGOTPCRel = true;
+}
+
/// getDepLibFromLinkerOpt - Extract the dependent library name from a linker
/// option string. Returns StringRef() if the option does not specify a library.
StringRef TargetLoweringObjectFileMachO::
@@ -697,6 +711,66 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
return SSym;
}
+const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation
+ // as 64-bit do, we replace the GOT equivalent by accessing the final symbol
+ // through a non_lazy_ptr stub instead. One advantage is that it allows the
+ // computation of deltas to final external symbols. Example:
+ //
+ // _extgotequiv:
+ // .long _extfoo
+ //
+ // _delta:
+ // .long _extgotequiv-_delta
+ //
+ // is transformed to:
+ //
+ // _delta:
+ // .long L_extfoo$non_lazy_ptr-(_delta+0)
+ //
+ // .section __IMPORT,__pointers,non_lazy_symbol_pointers
+ // L_extfoo$non_lazy_ptr:
+ // .indirect_symbol _extfoo
+ // .long 0
+ //
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MCContext &Ctx = getContext();
+
+ // The offset must consider the original displacement from the base symbol
+ // since 32-bit targets don't have a GOTPCREL to fold the PC displacement.
+ Offset = -MV.getConstant();
+ const MCSymbol *BaseSym = &MV.getSymB()->getSymbol();
+
+ // Access the final symbol via sym$non_lazy_ptr and generate the appropriated
+ // non_lazy_ptr stubs.
+ SmallString<128> Name;
+ StringRef Suffix = "$non_lazy_ptr";
+ Name += DL->getPrivateGlobalPrefix();
+ Name += Sym->getName();
+ Name += Suffix;
+ MCSymbol *Stub = Ctx.GetOrCreateSymbol(Name);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */);
+
+ const MCExpr *BSymExpr =
+ MCSymbolRefExpr::Create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
+ const MCExpr *LHS =
+ MCSymbolRefExpr::Create(Stub, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (!Offset)
+ return MCBinaryExpr::CreateSub(LHS, BSymExpr, Ctx);
+
+ const MCExpr *RHS =
+ MCBinaryExpr::CreateAdd(BSymExpr, MCConstantExpr::Create(Offset, Ctx), Ctx);
+ return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+}
+
//===----------------------------------------------------------------------===//
// COFF
//===----------------------------------------------------------------------===//
@@ -853,6 +927,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
StringRef COMDATSymName = Sym->getName();
return getContext().getCOFFSection(Name, Characteristics, Kind,
COMDATSymName, Selection);
+ } else {
+ SmallString<256> TmpData;
+ getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM);
+ return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
+ Selection);
}
}
@@ -874,6 +953,42 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
return DataSection;
}
+void TargetLoweringObjectFileCOFF::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+ bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+ if (GV->hasPrivateLinkage() &&
+ ((isa<Function>(GV) && TM.getFunctionSections()) ||
+ (isa<GlobalVariable>(GV) && TM.getDataSections())))
+ CannotUsePrivateLabel = true;
+
+ Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
+ const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+ // If the function can be removed, produce a unique section so that
+ // the table doesn't prevent the removal.
+ const Comdat *C = F.getComdat();
+ bool EmitUniqueSection = TM.getFunctionSections() || C;
+ if (!EmitUniqueSection)
+ return ReadOnlySection;
+
+ // FIXME: we should produce a symbol for F instead.
+ if (F.hasPrivateLinkage())
+ return ReadOnlySection;
+
+ MCSymbol *Sym = TM.getSymbol(&F, Mang);
+ StringRef COMDATSymName = Sym->getName();
+
+ SectionKind Kind = SectionKind::getReadOnly();
+ const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+ return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName,
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
+}
+
StringRef TargetLoweringObjectFileCOFF::
getDepLibFromLinkerOpt(StringRef LinkerOption) const {
const char *LibCmd = "/DEFAULTLIB:";
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1bbe6e1..57daeab 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -45,6 +45,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -102,6 +103,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
MachineBasicBlock::iterator OldPos);
+ bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
+
bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
@@ -309,6 +312,45 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
return true;
}
+/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg
+/// in current BB.
+static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ MachineInstr *Ret = nullptr;
+ for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
+ if (DefMI.getParent() != BB || DefMI.isDebugValue())
+ continue;
+ if (!Ret)
+ Ret = &DefMI;
+ else if (Ret != &DefMI)
+ return nullptr;
+ }
+ return Ret;
+}
+
+/// Check if there is a reversed copy chain from FromReg to ToReg:
+/// %Tmp1 = copy %Tmp2;
+/// %FromReg = copy %Tmp1;
+/// %ToReg = add %FromReg ...
+/// %Tmp2 = copy %ToReg;
+/// MaxLen specifies the maximum length of the copy chain the func
+/// can walk through.
+bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
+ int Maxlen) {
+ unsigned TmpReg = FromReg;
+ for (int i = 0; i < Maxlen; i++) {
+ MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI);
+ if (!Def || !Def->isCopy())
+ return false;
+
+ TmpReg = Def->getOperand(1).getReg();
+
+ if (TmpReg == ToReg)
+ return true;
+ }
+ return false;
+}
+
/// noUseAfterLastDef - Return true if there are no intervening uses between the
/// last instruction in the MBB that defines the specified register and the
/// two-address instruction which is being processed. It also returns the last
@@ -574,6 +616,27 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
if (!noUseAfterLastDef(regB, Dist, LastDefB))
return true;
+ // Look for situation like this:
+ // %reg101 = MOV %reg100
+ // %reg102 = ...
+ // %reg103 = ADD %reg102, %reg101
+ // ... = %reg103 ...
+ // %reg100 = MOV %reg103
+ // If there is a reversed copy chain from reg101 to reg103, commute the ADD
+ // to eliminate an otherwise unavoidable copy.
+ // FIXME:
+ // We can extend the logic further: If an pair of operands in an insn has
+ // been merged, the insn could be regarded as a virtual copy, and the virtual
+ // copy could also be used to construct a copy chain.
+ // To more generally minimize register copies, ideally the logic of two addr
+ // instruction pass should be integrated with register allocation pass where
+ // interference graph is available.
+ if (isRevCopyChain(regC, regA, 3))
+ return true;
+
+ if (isRevCopyChain(regB, regA, 3))
+ return false;
+
// Since there are no intervening uses for both registers, then commute
// if the def of regC is closer. Its live interval is shorter.
return LastDefB && LastDefC && LastDefC > LastDefB;
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 7d3b0ce..d9adfdf 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -286,7 +286,7 @@ void VirtRegRewriter::addMBBLiveIns() {
}
void VirtRegRewriter::rewrite() {
- bool NoSubRegLiveness = !MRI->tracksSubRegLiveness();
+ bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 6f712a9..ab0f96e 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -16,6 +16,8 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/IR/Function.h"
@@ -25,6 +27,10 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include <memory>
@@ -36,25 +42,31 @@ using namespace llvm::PatternMatch;
namespace {
-struct HandlerAllocas {
- TinyPtrVector<AllocaInst *> Allocas;
- int ParentFrameAllocationIndex;
-};
-
// This map is used to model frame variable usage during outlining, to
// construct a structure type to hold the frame variables in a frame
// allocation block, and to remap the frame variable allocas (including
// spill locations as needed) to GEPs that get the variable from the
// frame allocation structure.
-typedef MapVector<AllocaInst *, HandlerAllocas> FrameVarInfoMap;
+typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
-class WinEHPrepare : public FunctionPass {
- std::unique_ptr<FunctionPass> DwarfPrepare;
+typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
+
+enum ActionType { Catch, Cleanup };
+
+class LandingPadActions;
+class ActionHandler;
+class CatchHandler;
+class CleanupHandler;
+class LandingPadMap;
+typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
+typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy;
+
+class WinEHPrepare : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
WinEHPrepare(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), DwarfPrepare(createDwarfEHPass(TM)) {}
+ : FunctionPass(ID) {}
bool runOnFunction(Function &Fn) override;
@@ -67,11 +79,24 @@ public:
}
private:
- bool prepareCPPEHHandlers(Function &F,
- SmallVectorImpl<LandingPadInst *> &LPads);
- bool outlineCatchHandler(Function *SrcFn, Constant *SelectorType,
- LandingPadInst *LPad, CallInst *&EHAlloc,
- AllocaInst *&EHObjPtr, FrameVarInfoMap &VarInfo);
+ bool prepareExceptionHandlers(Function &F,
+ SmallVectorImpl<LandingPadInst *> &LPads);
+ bool outlineHandler(ActionHandler *Action, Function *SrcFn,
+ LandingPadInst *LPad, BasicBlock *StartBB,
+ FrameVarInfoMap &VarInfo);
+
+ void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
+ CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
+ VisitedBlockSet &VisitedBlocks);
+ CleanupHandler *findCleanupHandler(BasicBlock *StartBB, BasicBlock *EndBB);
+
+ void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
+
+ // All fields are reset by runOnFunction.
+ EHPersonality Personality;
+ CatchHandlerMapTy CatchHandlerMap;
+ CleanupHandlerMapTy CleanupHandlerMap;
+ DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
};
class WinEHFrameVariableMaterializer : public ValueMaterializer {
@@ -87,34 +112,218 @@ private:
IRBuilder<> Builder;
};
-class WinEHCatchDirector : public CloningDirector {
+class LandingPadMap {
+public:
+ LandingPadMap() : OriginLPad(nullptr) {}
+ void mapLandingPad(const LandingPadInst *LPad);
+
+ bool isInitialized() { return OriginLPad != nullptr; }
+
+ bool mapIfEHPtrLoad(const LoadInst *Load) {
+ return mapIfEHLoad(Load, EHPtrStores, EHPtrStoreAddrs);
+ }
+ bool mapIfSelectorLoad(const LoadInst *Load) {
+ return mapIfEHLoad(Load, SelectorStores, SelectorStoreAddrs);
+ }
+
+ bool isLandingPadSpecificInst(const Instruction *Inst) const;
+
+ void remapSelector(ValueToValueMapTy &VMap, Value *MappedValue) const;
+
+private:
+ bool mapIfEHLoad(const LoadInst *Load,
+ SmallVectorImpl<const StoreInst *> &Stores,
+ SmallVectorImpl<const Value *> &StoreAddrs);
+
+ const LandingPadInst *OriginLPad;
+ // We will normally only see one of each of these instructions, but
+ // if more than one occurs for some reason we can handle that.
+ TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
+ TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
+
+ // In optimized code, there will typically be at most one instance of
+ // each of the following, but in unoptimized IR it is not uncommon
+ // for the values to be stored, loaded and then stored again. In that
+ // case we will create a second entry for each store and store address.
+ SmallVector<const StoreInst *, 2> EHPtrStores;
+ SmallVector<const StoreInst *, 2> SelectorStores;
+ SmallVector<const Value *, 2> EHPtrStoreAddrs;
+ SmallVector<const Value *, 2> SelectorStoreAddrs;
+};
+
+class WinEHCloningDirectorBase : public CloningDirector {
public:
- WinEHCatchDirector(LandingPadInst *LPI, Function *CatchFn, Value *Selector,
- Value *EHObj, FrameVarInfoMap &VarInfo)
- : LPI(LPI), CurrentSelector(Selector->stripPointerCasts()), EHObj(EHObj),
- Materializer(CatchFn, VarInfo),
- SelectorIDType(Type::getInt32Ty(LPI->getContext())),
- Int8PtrType(Type::getInt8PtrTy(LPI->getContext())) {}
+ WinEHCloningDirectorBase(Function *HandlerFn,
+ FrameVarInfoMap &VarInfo,
+ LandingPadMap &LPadMap)
+ : Materializer(HandlerFn, VarInfo),
+ SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
+ Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())),
+ LPadMap(LPadMap) {}
CloningAction handleInstruction(ValueToValueMapTy &VMap,
const Instruction *Inst,
BasicBlock *NewBB) override;
+ virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) = 0;
+ virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) = 0;
+ virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) = 0;
+ virtual CloningAction handleInvoke(ValueToValueMapTy &VMap,
+ const InvokeInst *Invoke,
+ BasicBlock *NewBB) = 0;
+ virtual CloningAction handleResume(ValueToValueMapTy &VMap,
+ const ResumeInst *Resume,
+ BasicBlock *NewBB) = 0;
+
ValueMaterializer *getValueMaterializer() override { return &Materializer; }
-private:
- LandingPadInst *LPI;
- Value *CurrentSelector;
- Value *EHObj;
+protected:
WinEHFrameVariableMaterializer Materializer;
Type *SelectorIDType;
Type *Int8PtrType;
+ LandingPadMap &LPadMap;
+};
+
+class WinEHCatchDirector : public WinEHCloningDirectorBase {
+public:
+ WinEHCatchDirector(Function *CatchFn, Value *Selector,
+ FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+ : WinEHCloningDirectorBase(CatchFn, VarInfo, LPadMap),
+ CurrentSelector(Selector->stripPointerCasts()),
+ ExceptionObjectVar(nullptr) {}
+
+ CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
+ BasicBlock *NewBB) override;
+ CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
+ BasicBlock *NewBB) override;
+
+ const Value *getExceptionVar() { return ExceptionObjectVar; }
+ TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
+
+private:
+ Value *CurrentSelector;
- const Value *ExtractedEHPtr;
- const Value *ExtractedSelector;
- const Value *EHPtrStoreAddr;
- const Value *SelectorStoreAddr;
+ const Value *ExceptionObjectVar;
+ TinyPtrVector<BasicBlock *> ReturnTargets;
};
+
+class WinEHCleanupDirector : public WinEHCloningDirectorBase {
+public:
+ WinEHCleanupDirector(Function *CleanupFn,
+ FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
+ : WinEHCloningDirectorBase(CleanupFn, VarInfo, LPadMap) {}
+
+ CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
+ const Instruction *Inst,
+ BasicBlock *NewBB) override;
+ CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
+ BasicBlock *NewBB) override;
+ CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
+ BasicBlock *NewBB) override;
+};
+
+class ActionHandler {
+public:
+ ActionHandler(BasicBlock *BB, ActionType Type)
+ : StartBB(BB), Type(Type), HandlerBlockOrFunc(nullptr) {}
+
+ ActionType getType() const { return Type; }
+ BasicBlock *getStartBlock() const { return StartBB; }
+
+ bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; }
+
+ void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; }
+ Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; }
+
+private:
+ BasicBlock *StartBB;
+ ActionType Type;
+
+ // Can be either a BlockAddress or a Function depending on the EH personality.
+ Constant *HandlerBlockOrFunc;
+};
+
+class CatchHandler : public ActionHandler {
+public:
+ CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB)
+ : ActionHandler(BB, ActionType::Catch), Selector(Selector),
+ NextBB(NextBB), ExceptionObjectVar(nullptr) {}
+
+ // Method for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const ActionHandler *H) {
+ return H->getType() == ActionType::Catch;
+ }
+
+ Constant *getSelector() const { return Selector; }
+ BasicBlock *getNextBB() const { return NextBB; }
+
+ const Value *getExceptionVar() { return ExceptionObjectVar; }
+ TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
+
+ void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; }
+ void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) {
+ ReturnTargets = Targets;
+ }
+
+private:
+ Constant *Selector;
+ BasicBlock *NextBB;
+ const Value *ExceptionObjectVar;
+ TinyPtrVector<BasicBlock *> ReturnTargets;
+};
+
+class CleanupHandler : public ActionHandler {
+public:
+ CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {}
+
+ // Method for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const ActionHandler *H) {
+ return H->getType() == ActionType::Cleanup;
+ }
+};
+
+class LandingPadActions {
+public:
+ LandingPadActions() : HasCleanupHandlers(false) {}
+
+ void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); }
+ void insertCleanupHandler(CleanupHandler *Action) {
+ Actions.push_back(Action);
+ HasCleanupHandlers = true;
+ }
+
+ bool includesCleanup() const { return HasCleanupHandlers; }
+
+ SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
+ SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
+
+private:
+ // Note that this class does not own the ActionHandler objects in this vector.
+ // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap
+ // in the WinEHPrepare class.
+ SmallVector<ActionHandler *, 4> Actions;
+ bool HasCleanupHandlers;
+};
+
} // end anonymous namespace
char WinEHPrepare::ID = 0;
@@ -125,10 +334,10 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
return new WinEHPrepare(TM);
}
-static bool isMSVCPersonality(EHPersonality Pers) {
- return Pers == EHPersonality::MSVC_Win64SEH ||
- Pers == EHPersonality::MSVC_CXX;
-}
+// FIXME: Remove this once the backend can handle the prepared IR.
+static cl::opt<bool>
+SEHPrepare("sehprepare", cl::Hidden,
+ cl::desc("Prepare functions with SEH personalities"));
bool WinEHPrepare::runOnFunction(Function &Fn) {
SmallVector<LandingPadInst *, 4> LPads;
@@ -145,60 +354,67 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
return false;
// Classify the personality to see what kind of preparation we need.
- EHPersonality Pers = classifyEHPersonality(LPads.back()->getPersonalityFn());
-
- // Delegate through to the DWARF pass if this is unrecognized.
- if (!isMSVCPersonality(Pers))
- return DwarfPrepare->runOnFunction(Fn);
+ Personality = classifyEHPersonality(LPads.back()->getPersonalityFn());
- // FIXME: This only returns true if the C++ EH handlers were outlined.
- // When that code is complete, it should always return whatever
- // prepareCPPEHHandlers returns.
- if (Pers == EHPersonality::MSVC_CXX && prepareCPPEHHandlers(Fn, LPads))
- return true;
-
- // FIXME: SEH Cleanups are unimplemented. Replace them with unreachable.
- if (Resumes.empty())
+ // Do nothing if this is not an MSVC personality.
+ if (!isMSVCEHPersonality(Personality))
return false;
- for (ResumeInst *Resume : Resumes) {
- IRBuilder<>(Resume).CreateUnreachable();
- Resume->eraseFromParent();
+ if (isAsynchronousEHPersonality(Personality) && !SEHPrepare) {
+ // Replace all resume instructions with unreachable.
+ // FIXME: Remove this once the backend can handle the prepared IR.
+ for (ResumeInst *Resume : Resumes) {
+ IRBuilder<>(Resume).CreateUnreachable();
+ Resume->eraseFromParent();
+ }
+ return true;
}
+ // If there were any landing pads, prepareExceptionHandlers will make changes.
+ prepareExceptionHandlers(Fn, LPads);
return true;
}
bool WinEHPrepare::doFinalization(Module &M) {
- return DwarfPrepare->doFinalization(M);
+ return false;
}
-void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- DwarfPrepare->getAnalysisUsage(AU);
-}
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
-bool WinEHPrepare::prepareCPPEHHandlers(
+bool WinEHPrepare::prepareExceptionHandlers(
Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
// These containers are used to re-map frame variables that are used in
// outlined catch and cleanup handlers. They will be populated as the
// handlers are outlined.
FrameVarInfoMap FrameVarInfo;
- SmallVector<CallInst *, 4> HandlerAllocs;
- SmallVector<AllocaInst *, 4> HandlerEHObjPtrs;
bool HandlersOutlined = false;
+ Module *M = F.getParent();
+ LLVMContext &Context = M->getContext();
+
+ // Create a new function to receive the handler contents.
+ PointerType *Int8PtrType = Type::getInt8PtrTy(Context);
+ Type *Int32Type = Type::getInt32Ty(Context);
+ Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions);
+
for (LandingPadInst *LPad : LPads) {
// Look for evidence that this landingpad has already been processed.
bool LPadHasActionList = false;
BasicBlock *LPadBB = LPad->getParent();
- for (Instruction &Inst : LPadBB->getInstList()) {
- // FIXME: Make this an intrinsic.
- if (auto *Call = dyn_cast<CallInst>(&Inst))
- if (Call->getCalledFunction()->getName() == "llvm.eh.actions") {
+ for (Instruction &Inst : *LPadBB) {
+ if (auto *IntrinCall = dyn_cast<IntrinsicInst>(&Inst)) {
+ if (IntrinCall->getIntrinsicID() == Intrinsic::eh_actions) {
LPadHasActionList = true;
break;
}
+ }
+ // FIXME: This is here to help with the development of nested landing pad
+ // outlining. It should be removed when that is finished.
+ if (isa<UnreachableInst>(Inst)) {
+ LPadHasActionList = true;
+ break;
+ }
}
// If we've already outlined the handlers for this landingpad,
@@ -206,177 +422,244 @@ bool WinEHPrepare::prepareCPPEHHandlers(
if (LPadHasActionList)
continue;
- for (unsigned Idx = 0, NumClauses = LPad->getNumClauses(); Idx < NumClauses;
- ++Idx) {
- if (LPad->isCatch(Idx)) {
- // Create a new instance of the handler data structure in the
- // HandlerData vector.
- CallInst *EHAlloc = nullptr;
- AllocaInst *EHObjPtr = nullptr;
- bool Outlined = outlineCatchHandler(&F, LPad->getClause(Idx), LPad,
- EHAlloc, EHObjPtr, FrameVarInfo);
- if (Outlined) {
+ LandingPadActions Actions;
+ mapLandingPadBlocks(LPad, Actions);
+
+ for (ActionHandler *Action : Actions) {
+ if (Action->hasBeenProcessed())
+ continue;
+ BasicBlock *StartBB = Action->getStartBlock();
+
+ // SEH doesn't do any outlining for catches. Instead, pass the handler
+ // basic block addr to llvm.eh.actions and list the block as a return
+ // target.
+ if (isAsynchronousEHPersonality(Personality)) {
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ processSEHCatchHandler(CatchAction, StartBB);
HandlersOutlined = true;
- // These values must be resolved after all handlers have been
- // outlined.
- if (EHAlloc)
- HandlerAllocs.push_back(EHAlloc);
- if (EHObjPtr)
- HandlerEHObjPtrs.push_back(EHObjPtr);
+ continue;
}
- } // End if (isCatch)
- } // End for each clause
- } // End for each landingpad
+ }
+
+ if (outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo)) {
+ HandlersOutlined = true;
+ }
+ } // End for each Action
+
+ // FIXME: We need a guard against partially outlined functions.
+ if (!HandlersOutlined)
+ continue;
+
+ // Replace the landing pad with a new llvm.eh.action based landing pad.
+ BasicBlock *NewLPadBB = BasicBlock::Create(Context, "lpad", &F, LPadBB);
+ assert(!isa<PHINode>(LPadBB->begin()));
+ Instruction *NewLPad = LPad->clone();
+ NewLPadBB->getInstList().push_back(NewLPad);
+ while (!pred_empty(LPadBB)) {
+ auto *pred = *pred_begin(LPadBB);
+ InvokeInst *Invoke = cast<InvokeInst>(pred->getTerminator());
+ Invoke->setUnwindDest(NewLPadBB);
+ }
+
+ // Replace uses of the old lpad in phis with this block and delete the old
+ // block.
+ LPadBB->replaceSuccessorsPhiUsesWith(NewLPadBB);
+ LPadBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(LPadBB->getContext(), LPadBB);
+
+ // Add a call to describe the actions for this landing pad.
+ std::vector<Value *> ActionArgs;
+ for (ActionHandler *Action : Actions) {
+ // Action codes from docs are: 0 cleanup, 1 catch.
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
+ ActionArgs.push_back(CatchAction->getSelector());
+ Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
+ if (EHObj)
+ ActionArgs.push_back(EHObj);
+ else
+ ActionArgs.push_back(ConstantPointerNull::get(Int8PtrType));
+ } else {
+ ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
+ }
+ ActionArgs.push_back(Action->getHandlerBlockOrFunc());
+ }
+ CallInst *Recover =
+ CallInst::Create(ActionIntrin, ActionArgs, "recover", NewLPadBB);
+
+ // Add an indirect branch listing possible successors of the catch handlers.
+ IndirectBrInst *Branch = IndirectBrInst::Create(Recover, 0, NewLPadBB);
+ for (ActionHandler *Action : Actions) {
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ for (auto *Target : CatchAction->getReturnTargets()) {
+ Branch->addDestination(Target);
+ }
+ }
+ }
+ } // End for each landingpad
// If nothing got outlined, there is no more processing to be done.
if (!HandlersOutlined)
return false;
- // FIXME: We will replace the landingpad bodies with llvm.eh.actions
- // calls and indirect branches here and then delete blocks
- // which are no longer reachable. That will get rid of the
- // handlers that we have outlined. There is code below
- // that looks for allocas with no uses in the parent function.
- // That will only happen after the pruning is implemented.
-
- // Remap the frame variables.
- SmallVector<Type *, 2> StructTys;
- StructTys.push_back(Type::getInt32Ty(F.getContext())); // EH state
- StructTys.push_back(Type::getInt8PtrTy(F.getContext())); // EH object
-
- // Start the index at two since we always have the above fields at 0 and 1.
- int Idx = 2;
-
- // FIXME: Sort the FrameVarInfo vector by the ParentAlloca size and alignment
- // and add padding as necessary to provide the proper alignment.
-
- // Map the alloca instructions to the corresponding index in the
- // frame allocation structure. If any alloca is used only in a single
- // handler and is not used in the parent frame after outlining, it will
- // be assigned an index of -1, meaning the handler can keep its
- // "temporary" alloca and the original alloca can be erased from the
- // parent function. If we later encounter this alloca in a second
- // handler, we will assign it a place in the frame allocation structure
- // at that time. Since the instruction replacement doesn't happen until
- // all the entries in the HandlerData have been processed this isn't a
- // problem.
- for (auto &VarInfoEntry : FrameVarInfo) {
- AllocaInst *ParentAlloca = VarInfoEntry.first;
- HandlerAllocas &AllocaInfo = VarInfoEntry.second;
-
- // If the instruction still has uses in the parent function or if it is
- // referenced by more than one handler, add it to the frame allocation
- // structure.
- if (ParentAlloca->getNumUses() != 0 || AllocaInfo.Allocas.size() > 1) {
- Type *VarTy = ParentAlloca->getAllocatedType();
- StructTys.push_back(VarTy);
- AllocaInfo.ParentFrameAllocationIndex = Idx++;
- } else {
- // If the variable is not used in the parent frame and it is only used
- // in one handler, the alloca can be removed from the parent frame
- // and the handler will keep its "temporary" alloca to define the value.
- // An element index of -1 is used to indicate this condition.
- AllocaInfo.ParentFrameAllocationIndex = -1;
- }
- }
+ // Delete any blocks that were only used by handlers that were outlined above.
+ removeUnreachableBlocks(F);
- // Having filled the StructTys vector and assigned an index to each element,
- // we can now create the structure.
- StructType *EHDataStructTy = StructType::create(
- F.getContext(), StructTys, "struct." + F.getName().str() + ".ehdata");
- IRBuilder<> Builder(F.getParent()->getContext());
-
- // Create a frame allocation.
- Module *M = F.getParent();
- LLVMContext &Context = M->getContext();
BasicBlock *Entry = &F.getEntryBlock();
+ IRBuilder<> Builder(F.getParent()->getContext());
Builder.SetInsertPoint(Entry->getFirstInsertionPt());
- Function *FrameAllocFn =
- Intrinsic::getDeclaration(M, Intrinsic::frameallocate);
- uint64_t EHAllocSize = M->getDataLayout()->getTypeAllocSize(EHDataStructTy);
- Value *FrameAllocArgs[] = {
- ConstantInt::get(Type::getInt32Ty(Context), EHAllocSize)};
- CallInst *FrameAlloc =
- Builder.CreateCall(FrameAllocFn, FrameAllocArgs, "frame.alloc");
-
- Value *FrameEHData = Builder.CreateBitCast(
- FrameAlloc, EHDataStructTy->getPointerTo(), "eh.data");
-
- // Now visit each handler that is using the structure and bitcast its EHAlloc
- // value to be a pointer to the frame alloc structure.
- DenseMap<Function *, Value *> EHDataMap;
- for (CallInst *EHAlloc : HandlerAllocs) {
- // The EHAlloc has no uses at this time, so we need to just insert the
- // cast before the next instruction. There is always a next instruction.
- BasicBlock::iterator II = EHAlloc;
- ++II;
- Builder.SetInsertPoint(cast<Instruction>(II));
- Value *EHData = Builder.CreateBitCast(
- EHAlloc, EHDataStructTy->getPointerTo(), "eh.data");
- EHDataMap[EHAlloc->getParent()->getParent()] = EHData;
- }
- // Next, replace the place-holder EHObjPtr allocas with GEP instructions
- // that pull the EHObjPtr from the frame alloc structure
- for (AllocaInst *EHObjPtr : HandlerEHObjPtrs) {
- Value *EHData = EHDataMap[EHObjPtr->getParent()->getParent()];
- Builder.SetInsertPoint(EHObjPtr);
- Value *ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, 1);
- EHObjPtr->replaceAllUsesWith(ElementPtr);
- EHObjPtr->removeFromParent();
- ElementPtr->takeName(EHObjPtr);
- delete EHObjPtr;
- }
+ Function *FrameEscapeFn =
+ Intrinsic::getDeclaration(M, Intrinsic::frameescape);
+ Function *RecoverFrameFn =
+ Intrinsic::getDeclaration(M, Intrinsic::framerecover);
// Finally, replace all of the temporary allocas for frame variables used in
- // the outlined handlers and the original frame allocas with GEP instructions
- // that get the equivalent pointer from the frame allocation struct.
+ // the outlined handlers with calls to llvm.framerecover.
+ BasicBlock::iterator II = Entry->getFirstInsertionPt();
+ Instruction *AllocaInsertPt = II;
+ SmallVector<Value *, 8> AllocasToEscape;
for (auto &VarInfoEntry : FrameVarInfo) {
- AllocaInst *ParentAlloca = VarInfoEntry.first;
- HandlerAllocas &AllocaInfo = VarInfoEntry.second;
- int Idx = AllocaInfo.ParentFrameAllocationIndex;
-
- // If we have an index of -1 for this instruction, it means it isn't used
- // outside of this handler. In that case, we just keep the "temporary"
- // alloca in the handler and erase the original alloca from the parent.
- if (Idx == -1) {
+ Value *ParentVal = VarInfoEntry.first;
+ TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
+
+ // If the mapped value isn't already an alloca, we need to spill it if it
+ // is a computed value or copy it if it is an argument.
+ AllocaInst *ParentAlloca = dyn_cast<AllocaInst>(ParentVal);
+ if (!ParentAlloca) {
+ if (auto *Arg = dyn_cast<Argument>(ParentVal)) {
+ // Lower this argument to a copy and then demote that to the stack.
+ // We can't just use the argument location because the handler needs
+ // it to be in the frame allocation block.
+ // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
+ Value *TrueValue = ConstantInt::getTrue(Context);
+ Value *UndefValue = UndefValue::get(Arg->getType());
+ Instruction *SI =
+ SelectInst::Create(TrueValue, Arg, UndefValue,
+ Arg->getName() + ".tmp", AllocaInsertPt);
+ Arg->replaceAllUsesWith(SI);
+ // Reset the select operand, because it was clobbered by the RAUW above.
+ SI->setOperand(1, Arg);
+ ParentAlloca = DemoteRegToStack(*SI, true, SI);
+ } else if (auto *PN = dyn_cast<PHINode>(ParentVal)) {
+ ParentAlloca = DemotePHIToStack(PN, AllocaInsertPt);
+ } else {
+ Instruction *ParentInst = cast<Instruction>(ParentVal);
+ // FIXME: This is a work-around to temporarily handle the case where an
+ // instruction that is only used in handlers is not sunk.
+ // Without uses, DemoteRegToStack would just eliminate the value.
+ // This will fail if ParentInst is an invoke.
+ if (ParentInst->getNumUses() == 0) {
+ BasicBlock::iterator InsertPt = ParentInst;
+ ++InsertPt;
+ ParentAlloca =
+ new AllocaInst(ParentInst->getType(), nullptr,
+ ParentInst->getName() + ".reg2mem", InsertPt);
+ new StoreInst(ParentInst, ParentAlloca, InsertPt);
+ } else {
+ ParentAlloca = DemoteRegToStack(*ParentInst, true, ParentInst);
+ }
+ }
+ }
+
+ // If the parent alloca is no longer used and only one of the handlers used
+ // it, erase the parent and leave the copy in the outlined handler.
+ if (ParentAlloca->getNumUses() == 0 && Allocas.size() == 1) {
ParentAlloca->eraseFromParent();
- } else {
- // Otherwise, we replace the parent alloca and all outlined allocas
- // which map to it with GEP instructions.
-
- // First replace the original alloca.
- Builder.SetInsertPoint(ParentAlloca);
- Builder.SetCurrentDebugLocation(ParentAlloca->getDebugLoc());
- Value *ElementPtr =
- Builder.CreateConstInBoundsGEP2_32(FrameEHData, 0, Idx);
- ParentAlloca->replaceAllUsesWith(ElementPtr);
- ParentAlloca->removeFromParent();
- ElementPtr->takeName(ParentAlloca);
- delete ParentAlloca;
-
- // Next replace all outlined allocas that are mapped to it.
- for (AllocaInst *TempAlloca : AllocaInfo.Allocas) {
- Value *EHData = EHDataMap[TempAlloca->getParent()->getParent()];
- // FIXME: Sink this GEP into the blocks where it is used.
- Builder.SetInsertPoint(TempAlloca);
- Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
- ElementPtr = Builder.CreateConstInBoundsGEP2_32(EHData, 0, Idx);
- TempAlloca->replaceAllUsesWith(ElementPtr);
- TempAlloca->removeFromParent();
- ElementPtr->takeName(TempAlloca);
- delete TempAlloca;
+ continue;
+ }
+
+ // Add this alloca to the list of things to escape.
+ AllocasToEscape.push_back(ParentAlloca);
+
+ // Next replace all outlined allocas that are mapped to it.
+ for (AllocaInst *TempAlloca : Allocas) {
+ Function *HandlerFn = TempAlloca->getParent()->getParent();
+ // FIXME: Sink this GEP into the blocks where it is used.
+ Builder.SetInsertPoint(TempAlloca);
+ Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
+ Value *RecoverArgs[] = {
+ Builder.CreateBitCast(&F, Int8PtrType, ""),
+ &(HandlerFn->getArgumentList().back()),
+ llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)};
+ Value *RecoveredAlloca = Builder.CreateCall(RecoverFrameFn, RecoverArgs);
+ // Add a pointer bitcast if the alloca wasn't an i8.
+ if (RecoveredAlloca->getType() != TempAlloca->getType()) {
+ RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8");
+ RecoveredAlloca =
+ Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType());
}
- } // end else of if (Idx == -1)
- } // End for each FrameVarInfo entry.
+ TempAlloca->replaceAllUsesWith(RecoveredAlloca);
+ TempAlloca->removeFromParent();
+ RecoveredAlloca->takeName(TempAlloca);
+ delete TempAlloca;
+ }
+ } // End for each FrameVarInfo entry.
+
+ // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry
+ // block.
+ Builder.SetInsertPoint(&F.getEntryBlock().back());
+ Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
+
+ // Insert an alloca for the EH state in the entry block. On x86, we will also
+ // insert stores to update the EH state, but on other ISAs, the runtime does
+ // it for us.
+ // FIXME: This record is different on x86.
+ Type *UnwindHelpTy = Type::getInt64Ty(Context);
+ AllocaInst *UnwindHelp =
+ new AllocaInst(UnwindHelpTy, "unwindhelp", &F.getEntryBlock().front());
+ Builder.CreateStore(llvm::ConstantInt::get(UnwindHelpTy, -2), UnwindHelp);
+ Function *UnwindHelpFn =
+ Intrinsic::getDeclaration(M, Intrinsic::eh_unwindhelp);
+ Builder.CreateCall(UnwindHelpFn,
+ Builder.CreateBitCast(UnwindHelp, Int8PtrType));
+
+ // Clean up the handler action maps we created for this function
+ DeleteContainerSeconds(CatchHandlerMap);
+ CatchHandlerMap.clear();
+ DeleteContainerSeconds(CleanupHandlerMap);
+ CleanupHandlerMap.clear();
return HandlersOutlined;
}
-bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType,
- LandingPadInst *LPad, CallInst *&EHAlloc,
- AllocaInst *&EHObjPtr,
- FrameVarInfoMap &VarInfo) {
+// This function examines a block to determine whether the block ends with a
+// conditional branch to a catch handler based on a selector comparison.
+// This function is used both by the WinEHPrepare::findSelectorComparison() and
+// WinEHCleanupDirector::handleTypeIdFor().
+static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
+ Constant *&Selector, BasicBlock *&NextBB) {
+ ICmpInst::Predicate Pred;
+ BasicBlock *TBB, *FBB;
+ Value *LHS, *RHS;
+
+ if (!match(BB->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB)))
+ return false;
+
+ if (!match(LHS,
+ m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) &&
+ !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))))
+ return false;
+
+ if (Pred == CmpInst::ICMP_EQ) {
+ CatchHandler = TBB;
+ NextBB = FBB;
+ return true;
+ }
+
+ if (Pred == CmpInst::ICMP_NE) {
+ CatchHandler = FBB;
+ NextBB = TBB;
+ return true;
+ }
+
+ return false;
+}
+
+bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
+ LandingPadInst *LPad, BasicBlock *StartBB,
+ FrameVarInfoMap &VarInfo) {
Module *M = SrcFn->getParent();
LLVMContext &Context = M->getContext();
@@ -385,133 +668,241 @@ bool WinEHPrepare::outlineCatchHandler(Function *SrcFn, Constant *SelectorType,
std::vector<Type *> ArgTys;
ArgTys.push_back(Int8PtrType);
ArgTys.push_back(Int8PtrType);
- FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false);
- Function *CatchHandler = Function::Create(
- FnType, GlobalVariable::ExternalLinkage, SrcFn->getName() + ".catch", M);
+ Function *Handler;
+ if (Action->getType() == Catch) {
+ FunctionType *FnType = FunctionType::get(Int8PtrType, ArgTys, false);
+ Handler = Function::Create(FnType, GlobalVariable::InternalLinkage,
+ SrcFn->getName() + ".catch", M);
+ } else {
+ FunctionType *FnType =
+ FunctionType::get(Type::getVoidTy(Context), ArgTys, false);
+ Handler = Function::Create(FnType, GlobalVariable::InternalLinkage,
+ SrcFn->getName() + ".cleanup", M);
+ }
// Generate a standard prolog to setup the frame recovery structure.
IRBuilder<> Builder(Context);
- BasicBlock *Entry = BasicBlock::Create(Context, "catch.entry");
- CatchHandler->getBasicBlockList().push_front(Entry);
+ BasicBlock *Entry = BasicBlock::Create(Context, "entry");
+ Handler->getBasicBlockList().push_front(Entry);
Builder.SetInsertPoint(Entry);
Builder.SetCurrentDebugLocation(LPad->getDebugLoc());
- // The outlined handler will be called with the parent's frame pointer as
- // its second argument. To enable the handler to access variables from
- // the parent frame, we use that pointer to get locate a special block
- // of memory that was allocated using llvm.eh.allocateframe for this
- // purpose. During the outlining process we will determine which frame
- // variables are used in handlers and create a structure that maps these
- // variables into the frame allocation block.
- //
- // The frame allocation block also contains an exception state variable
- // used by the runtime and a pointer to the exception object pointer
- // which will be filled in by the runtime for use in the handler.
- Function *RecoverFrameFn =
- Intrinsic::getDeclaration(M, Intrinsic::framerecover);
- Value *RecoverArgs[] = {Builder.CreateBitCast(SrcFn, Int8PtrType, ""),
- &(CatchHandler->getArgumentList().back())};
- EHAlloc = Builder.CreateCall(RecoverFrameFn, RecoverArgs, "eh.alloc");
-
- // This alloca is only temporary. We'll be replacing it once we know all the
- // frame variables that need to go in the frame allocation structure.
- EHObjPtr = Builder.CreateAlloca(Int8PtrType, 0, "eh.obj.ptr");
-
- // This will give us a raw pointer to the exception object, which
- // corresponds to the formal parameter of the catch statement. If the
- // handler uses this object, we will generate code during the outlining
- // process to cast the pointer to the appropriate type and deference it
- // as necessary. The un-outlined landing pad code represents the
- // exception object as the result of the llvm.eh.begincatch call.
- Value *EHObj = Builder.CreateLoad(EHObjPtr, false, "eh.obj");
+ std::unique_ptr<WinEHCloningDirectorBase> Director;
ValueToValueMapTy VMap;
- // FIXME: Map other values referenced in the filter handler.
-
- WinEHCatchDirector Director(LPad, CatchHandler, SelectorType, EHObj, VarInfo);
+ LandingPadMap &LPadMap = LPadMaps[LPad];
+ if (!LPadMap.isInitialized())
+ LPadMap.mapLandingPad(LPad);
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ Constant *Sel = CatchAction->getSelector();
+ Director.reset(new WinEHCatchDirector(Handler, Sel, VarInfo, LPadMap));
+ LPadMap.remapSelector(VMap, ConstantInt::get(Type::getInt32Ty(Context), 1));
+ } else {
+ Director.reset(new WinEHCleanupDirector(Handler, VarInfo, LPadMap));
+ }
SmallVector<ReturnInst *, 8> Returns;
- ClonedCodeInfo InlinedFunctionInfo;
+ ClonedCodeInfo OutlinedFunctionInfo;
+
+ // If the start block contains PHI nodes, we need to map them.
+ BasicBlock::iterator II = StartBB->begin();
+ while (auto *PN = dyn_cast<PHINode>(II)) {
+ bool Mapped = false;
+ // Look for PHI values that we have already mapped (such as the selector).
+ for (Value *Val : PN->incoming_values()) {
+ if (VMap.count(Val)) {
+ VMap[PN] = VMap[Val];
+ Mapped = true;
+ }
+ }
+ // If we didn't find a match for this value, map it as an undef.
+ if (!Mapped) {
+ VMap[PN] = UndefValue::get(PN->getType());
+ }
+ ++II;
+ }
- BasicBlock::iterator II = LPad;
+ // Skip over PHIs and, if applicable, landingpad instructions.
+ II = StartBB->getFirstInsertionPt();
- CloneAndPruneIntoFromInst(CatchHandler, SrcFn, ++II, VMap,
+ CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap,
/*ModuleLevelChanges=*/false, Returns, "",
- &InlinedFunctionInfo,
- SrcFn->getParent()->getDataLayout(), &Director);
+ &OutlinedFunctionInfo, Director.get());
// Move all the instructions in the first cloned block into our entry block.
BasicBlock *FirstClonedBB = std::next(Function::iterator(Entry));
Entry->getInstList().splice(Entry->end(), FirstClonedBB->getInstList());
FirstClonedBB->eraseFromParent();
+ if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
+ WinEHCatchDirector *CatchDirector =
+ reinterpret_cast<WinEHCatchDirector *>(Director.get());
+ CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
+ CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
+ }
+
+ Action->setHandlerBlockOrFunc(Handler);
+
return true;
}
-CloningDirector::CloningAction WinEHCatchDirector::handleInstruction(
- ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
- // Intercept instructions which extract values from the landing pad aggregate.
- if (auto *Extract = dyn_cast<ExtractValueInst>(Inst)) {
- if (Extract->getAggregateOperand() == LPI) {
- assert(Extract->getNumIndices() == 1 &&
- "Unexpected operation: extracting both landing pad values");
- assert((*(Extract->idx_begin()) == 0 || *(Extract->idx_begin()) == 1) &&
- "Unexpected operation: extracting an unknown landing pad element");
-
- if (*(Extract->idx_begin()) == 0) {
- // Element 0 doesn't directly corresponds to anything in the WinEH
- // scheme.
- // It will be stored to a memory location, then later loaded and finally
- // the loaded value will be used as the argument to an
- // llvm.eh.begincatch
- // call. We're tracking it here so that we can skip the store and load.
- ExtractedEHPtr = Inst;
- } else {
- // Element 1 corresponds to the filter selector. We'll map it to 1 for
- // matching purposes, but it will also probably be stored to memory and
- // reloaded, so we need to track the instuction so that we can map the
- // loaded value too.
- VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
- ExtractedSelector = Inst;
- }
-
- // Tell the caller not to clone this instruction.
- return CloningDirector::SkipInstruction;
- }
- // Other extract value instructions just get cloned.
- return CloningDirector::CloneInstruction;
+/// This BB must end in a selector dispatch. All we need to do is pass the
+/// handler block to llvm.eh.actions and list it as a possible indirectbr
+/// target.
+void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction,
+ BasicBlock *StartBB) {
+ BasicBlock *HandlerBB;
+ BasicBlock *NextBB;
+ Constant *Selector;
+ bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB);
+ if (Res) {
+ // If this was EH dispatch, this must be a conditional branch to the handler
+ // block.
+ // FIXME: Handle instructions in the dispatch block. Currently we drop them,
+ // leading to crashes if some optimization hoists stuff here.
+ assert(CatchAction->getSelector() && HandlerBB &&
+ "expected catch EH dispatch");
+ } else {
+ // This must be a catch-all. Split the block after the landingpad.
+ assert(CatchAction->getSelector()->isNullValue() && "expected catch-all");
+ HandlerBB =
+ StartBB->splitBasicBlock(StartBB->getFirstInsertionPt(), "catch.all");
}
+ CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB));
+ TinyPtrVector<BasicBlock *> Targets(HandlerBB);
+ CatchAction->setReturnTargets(Targets);
+}
- if (auto *Store = dyn_cast<StoreInst>(Inst)) {
- // Look for and suppress stores of the extracted landingpad values.
- const Value *StoredValue = Store->getValueOperand();
- if (StoredValue == ExtractedEHPtr) {
- EHPtrStoreAddr = Store->getPointerOperand();
- return CloningDirector::SkipInstruction;
+void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
+ // Each instance of this class should only ever be used to map a single
+ // landing pad.
+ assert(OriginLPad == nullptr || OriginLPad == LPad);
+
+ // If the landing pad has already been mapped, there's nothing more to do.
+ if (OriginLPad == LPad)
+ return;
+
+ OriginLPad = LPad;
+
+ // The landingpad instruction returns an aggregate value. Typically, its
+ // value will be passed to a pair of extract value instructions and the
+ // results of those extracts are often passed to store instructions.
+ // In unoptimized code the stored value will often be loaded and then stored
+ // again.
+ for (auto *U : LPad->users()) {
+ const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
+ if (!Extract)
+ continue;
+ assert(Extract->getNumIndices() == 1 &&
+ "Unexpected operation: extracting both landing pad values");
+ unsigned int Idx = *(Extract->idx_begin());
+ assert((Idx == 0 || Idx == 1) &&
+ "Unexpected operation: extracting an unknown landing pad element");
+ if (Idx == 0) {
+ // Element 0 doesn't directly corresponds to anything in the WinEH
+ // scheme.
+ // It will be stored to a memory location, then later loaded and finally
+ // the loaded value will be used as the argument to an
+ // llvm.eh.begincatch
+ // call. We're tracking it here so that we can skip the store and load.
+ ExtractedEHPtrs.push_back(Extract);
+ } else if (Idx == 1) {
+ // Element 1 corresponds to the filter selector. We'll map it to 1 for
+ // matching purposes, but it will also probably be stored to memory and
+ // reloaded, so we need to track the instuction so that we can map the
+ // loaded value too.
+ ExtractedSelectors.push_back(Extract);
}
- if (StoredValue == ExtractedSelector) {
- SelectorStoreAddr = Store->getPointerOperand();
- return CloningDirector::SkipInstruction;
+
+ // Look for stores of the extracted values.
+ for (auto *EU : Extract->users()) {
+ if (auto *Store = dyn_cast<StoreInst>(EU)) {
+ if (Idx == 1) {
+ SelectorStores.push_back(Store);
+ SelectorStoreAddrs.push_back(Store->getPointerOperand());
+ } else {
+ EHPtrStores.push_back(Store);
+ EHPtrStoreAddrs.push_back(Store->getPointerOperand());
+ }
+ }
}
+ }
+}
- // Any other store just gets cloned.
- return CloningDirector::CloneInstruction;
+bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
+ if (Inst == OriginLPad)
+ return true;
+ for (auto *Extract : ExtractedEHPtrs) {
+ if (Inst == Extract)
+ return true;
+ }
+ for (auto *Extract : ExtractedSelectors) {
+ if (Inst == Extract)
+ return true;
+ }
+ for (auto *Store : EHPtrStores) {
+ if (Inst == Store)
+ return true;
+ }
+ for (auto *Store : SelectorStores) {
+ if (Inst == Store)
+ return true;
+ }
+
+ return false;
+}
+
+void LandingPadMap::remapSelector(ValueToValueMapTy &VMap,
+ Value *MappedValue) const {
+ // Remap all selector extract instructions to the specified value.
+ for (auto *Extract : ExtractedSelectors)
+ VMap[Extract] = MappedValue;
+}
+
+bool LandingPadMap::mapIfEHLoad(const LoadInst *Load,
+ SmallVectorImpl<const StoreInst *> &Stores,
+ SmallVectorImpl<const Value *> &StoreAddrs) {
+ // This makes the assumption that a store we've previously seen dominates
+ // this load instruction. That might seem like a rather huge assumption,
+ // but given the way that landingpads are constructed its fairly safe.
+ // FIXME: Add debug/assert code that verifies this.
+ const Value *LoadAddr = Load->getPointerOperand();
+ for (auto *StoreAddr : StoreAddrs) {
+ if (LoadAddr == StoreAddr) {
+ // Handle the common debug scenario where this loaded value is stored
+ // to a different location.
+ for (auto *U : Load->users()) {
+ if (auto *Store = dyn_cast<StoreInst>(U)) {
+ Stores.push_back(Store);
+ StoreAddrs.push_back(Store->getPointerOperand());
+ }
+ }
+ return true;
+ }
}
+ return false;
+}
+
+CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // If this is one of the boilerplate landing pad instructions, skip it.
+ // The instruction will have already been remapped in VMap.
+ if (LPadMap.isLandingPadSpecificInst(Inst))
+ return CloningDirector::SkipInstruction;
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
// Look for loads of (previously suppressed) landingpad values.
- // The EHPtr load can be ignored (it should only be used as
- // an argument to llvm.eh.begincatch), but the selector value
- // needs to be mapped to a constant value of 1 to be used to
- // simplify the branching to always flow to the current handler.
- const Value *LoadAddr = Load->getPointerOperand();
- if (LoadAddr == EHPtrStoreAddr) {
- VMap[Inst] = UndefValue::get(Int8PtrType);
+ // The EHPtr load can be mapped to an undef value as it should only be used
+ // as an argument to llvm.eh.begincatch, but the selector value needs to be
+ // mapped to a constant value of 1. This value will be used to simplify the
+ // branching to always flow to the current handler.
+ if (LPadMap.mapIfSelectorLoad(Load)) {
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
return CloningDirector::SkipInstruction;
}
- if (LoadAddr == SelectorStoreAddr) {
- VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+ if (LPadMap.mapIfEHPtrLoad(Load)) {
+ VMap[Inst] = UndefValue::get(Int8PtrType);
return CloningDirector::SkipInstruction;
}
@@ -519,108 +910,576 @@ CloningDirector::CloningAction WinEHCatchDirector::handleInstruction(
return CloningDirector::CloneInstruction;
}
- if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>())) {
- // The argument to the call is some form of the first element of the
- // landingpad aggregate value, but that doesn't matter. It isn't used
- // here.
- // The return value of this instruction, however, is used to access the
- // EH object pointer. We have generated an instruction to get that value
- // from the EH alloc block, so we can just map to that here.
- VMap[Inst] = EHObj;
- return CloningDirector::SkipInstruction;
- }
- if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>())) {
- auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
- // It might be interesting to track whether or not we are inside a catch
- // function, but that might make the algorithm more brittle than it needs
- // to be.
-
- // The end catch call can occur in one of two places: either in a
- // landingpad
- // block that is part of the catch handlers exception mechanism, or at the
- // end of the catch block. If it occurs in a landing pad, we must skip it
- // and continue so that the landing pad gets cloned.
- // FIXME: This case isn't fully supported yet and shouldn't turn up in any
- // of the test cases until it is.
- if (IntrinCall->getParent()->isLandingPad())
- return CloningDirector::SkipInstruction;
-
- // If an end catch occurs anywhere else the next instruction should be an
- // unconditional branch instruction that we want to replace with a return
- // to the the address of the branch target.
- const BasicBlock *EndCatchBB = IntrinCall->getParent();
- const TerminatorInst *Terminator = EndCatchBB->getTerminator();
- const BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
- assert(Branch && Branch->isUnconditional());
- assert(std::next(BasicBlock::const_iterator(IntrinCall)) ==
- BasicBlock::const_iterator(Branch));
-
- ReturnInst::Create(NewBB->getContext(),
- BlockAddress::get(Branch->getSuccessor(0)), NewBB);
-
- // We just added a terminator to the cloned block.
- // Tell the caller to stop processing the current basic block so that
- // the branch instruction will be skipped.
+ // Nested landing pads will be cloned as stubs, with just the
+ // landingpad instruction and an unreachable instruction. When
+ // all landingpads have been outlined, we'll replace this with the
+ // llvm.eh.actions call and indirect branch created when the
+ // landing pad was outlined.
+ if (auto *NestedLPad = dyn_cast<LandingPadInst>(Inst)) {
+ Instruction *NewInst = NestedLPad->clone();
+ if (NestedLPad->hasName())
+ NewInst->setName(NestedLPad->getName());
+ // FIXME: Store this mapping somewhere else also.
+ VMap[NestedLPad] = NewInst;
+ BasicBlock::InstListType &InstList = NewBB->getInstList();
+ InstList.push_back(NewInst);
+ InstList.push_back(new UnreachableInst(NewBB->getContext()));
return CloningDirector::StopCloningBB;
}
- if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>())) {
- auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
- Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
- // This causes a replacement that will collapse the landing pad CFG based
- // on the filter function we intend to match.
- if (Selector == CurrentSelector)
- VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
- else
- VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
- // Tell the caller not to clone this instruction.
- return CloningDirector::SkipInstruction;
- }
+
+ if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
+ return handleInvoke(VMap, Invoke, NewBB);
+
+ if (auto *Resume = dyn_cast<ResumeInst>(Inst))
+ return handleResume(VMap, Resume, NewBB);
+
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
+ return handleBeginCatch(VMap, Inst, NewBB);
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+ return handleEndCatch(VMap, Inst, NewBB);
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+ return handleTypeIdFor(VMap, Inst, NewBB);
// Continue with the default cloning behavior.
return CloningDirector::CloneInstruction;
}
+CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // The argument to the call is some form of the first element of the
+ // landingpad aggregate value, but that doesn't matter. It isn't used
+ // here.
+ // The second argument is an outparameter where the exception object will be
+ // stored. Typically the exception object is a scalar, but it can be an
+ // aggregate when catching by value.
+ // FIXME: Leave something behind to indicate where the exception object lives
+ // for this handler. Should it be part of llvm.eh.actions?
+ assert(ExceptionObjectVar == nullptr && "Multiple calls to "
+ "llvm.eh.begincatch found while "
+ "outlining catch handler.");
+ ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
+ return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
+ const Instruction *Inst, BasicBlock *NewBB) {
+ auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+ // It might be interesting to track whether or not we are inside a catch
+ // function, but that might make the algorithm more brittle than it needs
+ // to be.
+
+ // The end catch call can occur in one of two places: either in a
+ // landingpad block that is part of the catch handlers exception mechanism,
+ // or at the end of the catch block. If it occurs in a landing pad, we must
+ // skip it and continue so that the landing pad gets cloned.
+ // FIXME: This case isn't fully supported yet and shouldn't turn up in any
+ // of the test cases until it is.
+ if (IntrinCall->getParent()->isLandingPad())
+ return CloningDirector::SkipInstruction;
+
+ // If an end catch occurs anywhere else the next instruction should be an
+ // unconditional branch instruction that we want to replace with a return
+ // to the the address of the branch target.
+ const BasicBlock *EndCatchBB = IntrinCall->getParent();
+ const TerminatorInst *Terminator = EndCatchBB->getTerminator();
+ const BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
+ assert(Branch && Branch->isUnconditional());
+ assert(std::next(BasicBlock::const_iterator(IntrinCall)) ==
+ BasicBlock::const_iterator(Branch));
+
+ BasicBlock *ContinueLabel = Branch->getSuccessor(0);
+ ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueLabel),
+ NewBB);
+ ReturnTargets.push_back(ContinueLabel);
+
+ // We just added a terminator to the cloned block.
+ // Tell the caller to stop processing the current basic block so that
+ // the branch instruction will be skipped.
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
+ Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
+ // This causes a replacement that will collapse the landing pad CFG based
+ // on the filter function we intend to match.
+ if (Selector == CurrentSelector)
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
+ else
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+ // Tell the caller not to clone this instruction.
+ return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap,
+ const InvokeInst *Invoke, BasicBlock *NewBB) {
+ return CloningDirector::CloneInstruction;
+}
+
+CloningDirector::CloningAction
+WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
+ const ResumeInst *Resume, BasicBlock *NewBB) {
+ // Resume instructions shouldn't be reachable from catch handlers.
+ // We still need to handle it, but it will be pruned.
+ BasicBlock::InstListType &InstList = NewBB->getInstList();
+ InstList.push_back(new UnreachableInst(NewBB->getContext()));
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // Catch blocks within cleanup handlers will always be unreachable.
+ // We'll insert an unreachable instruction now, but it will be pruned
+ // before the cloning process is complete.
+ BasicBlock::InstListType &InstList = NewBB->getInstList();
+ InstList.push_back(new UnreachableInst(NewBB->getContext()));
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // Catch blocks within cleanup handlers will always be unreachable.
+ // We'll insert an unreachable instruction now, but it will be pruned
+ // before the cloning process is complete.
+ BasicBlock::InstListType &InstList = NewBB->getInstList();
+ InstList.push_back(new UnreachableInst(NewBB->getContext()));
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
+ ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
+ // If we encounter a selector comparison while cloning a cleanup handler,
+ // we want to stop cloning immediately. Anything after the dispatch
+ // will be outlined into a different handler.
+ BasicBlock *CatchHandler;
+ Constant *Selector;
+ BasicBlock *NextBB;
+ if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()),
+ CatchHandler, Selector, NextBB)) {
+ ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+ return CloningDirector::StopCloningBB;
+ }
+ // If eg.typeid.for is called for any other reason, it can be ignored.
+ VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
+ return CloningDirector::SkipInstruction;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
+ ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) {
+ // All invokes in cleanup handlers can be replaced with calls.
+ SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3);
+ // Insert a normal call instruction...
+ CallInst *NewCall =
+ CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs,
+ Invoke->getName(), NewBB);
+ NewCall->setCallingConv(Invoke->getCallingConv());
+ NewCall->setAttributes(Invoke->getAttributes());
+ NewCall->setDebugLoc(Invoke->getDebugLoc());
+ VMap[Invoke] = NewCall;
+
+ // Insert an unconditional branch to the normal destination.
+ BranchInst::Create(Invoke->getNormalDest(), NewBB);
+
+ // The unwind destination won't be cloned into the new function, so
+ // we don't need to clean up its phi nodes.
+
+ // We just added a terminator to the cloned block.
+ // Tell the caller to stop processing the current basic block.
+ return CloningDirector::StopCloningBB;
+}
+
+CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
+ ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) {
+ ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
+
+ // We just added a terminator to the cloned block.
+ // Tell the caller to stop processing the current basic block so that
+ // the branch instruction will be skipped.
+ return CloningDirector::StopCloningBB;
+}
+
WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
Function *OutlinedFn, FrameVarInfoMap &FrameVarInfo)
: FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
Builder.SetInsertPoint(&OutlinedFn->getEntryBlock());
- // FIXME: Do something with the FrameVarMapped so that it is shared across the
- // function.
}
Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
- // If we're asked to materialize an alloca variable, we temporarily
- // create a matching alloca in the outlined function. When all the
- // outlining is complete, we'll collect these into a structure and
- // replace these temporary allocas with GEPs referencing the frame
- // allocation block.
+ // If we're asked to materialize a value that is an instruction, we
+ // temporarily create an alloca in the outlined function and add this
+ // to the FrameVarInfo map. When all the outlining is complete, we'll
+ // collect these into a structure, spilling non-alloca values in the
+ // parent frame as necessary, and replace these temporary allocas with
+ // GEPs referencing the frame allocation block.
+
+ // If the value is an alloca, the mapping is direct.
if (auto *AV = dyn_cast<AllocaInst>(V)) {
- AllocaInst *NewAlloca = Builder.CreateAlloca(
- AV->getAllocatedType(), AV->getArraySize(), AV->getName());
- FrameVarInfo[AV].Allocas.push_back(NewAlloca);
+ AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone());
+ Builder.Insert(NewAlloca, AV->getName());
+ FrameVarInfo[AV].push_back(NewAlloca);
return NewAlloca;
}
-// FIXME: Do PHI nodes need special handling?
+ // For other types of instructions or arguments, we need an alloca based on
+ // the value's type and a load of the alloca. The alloca will be replaced
+ // by a GEP, but the load will stay. In the parent function, the value will
+ // be spilled to a location in the frame allocation block.
+ if (isa<Instruction>(V) || isa<Argument>(V)) {
+ AllocaInst *NewAlloca =
+ Builder.CreateAlloca(V->getType(), nullptr, "eh.temp.alloca");
+ FrameVarInfo[V].push_back(NewAlloca);
+ LoadInst *NewLoad = Builder.CreateLoad(NewAlloca, V->getName() + ".reload");
+ return NewLoad;
+ }
-// FIXME: Are there other cases we can handle better? GEP, ExtractValue, etc.
+ // Don't materialize other values.
+ return nullptr;
+}
-// FIXME: This doesn't work during cloning because it finds an instruction
-// in the use list that isn't yet part of a basic block.
-#if 0
- // If we're asked to remap some other instruction, we'll need to
- // spill it to an alloca variable in the parent function and add a
- // temporary alloca in the outlined function to be processed as
- // described above.
- Instruction *Inst = dyn_cast<Instruction>(V);
- if (Inst) {
- AllocaInst *Spill = DemoteRegToStack(*Inst, true);
- AllocaInst *NewAlloca = Builder.CreateAlloca(Spill->getAllocatedType(),
- Spill->getArraySize());
- FrameVarMap[AV] = NewAlloca;
- return NewAlloca;
+// This function maps the catch and cleanup handlers that are reachable from the
+// specified landing pad. The landing pad sequence will have this basic shape:
+//
+// <cleanup handler>
+// <selector comparison>
+// <catch handler>
+// <cleanup handler>
+// <selector comparison>
+// <catch handler>
+// <cleanup handler>
+// ...
+//
+// Any of the cleanup slots may be absent. The cleanup slots may be occupied by
+// any arbitrary control flow, but all paths through the cleanup code must
+// eventually reach the next selector comparison and no path can skip to a
+// different selector comparisons, though some paths may terminate abnormally.
+// Therefore, we will use a depth first search from the start of any given
+// cleanup block and stop searching when we find the next selector comparison.
+//
+// If the landingpad instruction does not have a catch clause, we will assume
+// that any instructions other than selector comparisons and catch handlers can
+// be ignored. In practice, these will only be the boilerplate instructions.
+//
+// The catch handlers may also have any control structure, but we are only
+// interested in the start of the catch handlers, so we don't need to actually
+// follow the flow of the catch handlers. The start of the catch handlers can
+// be located from the compare instructions, but they can be skipped in the
+// flow by following the contrary branch.
+void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
+ LandingPadActions &Actions) {
+ unsigned int NumClauses = LPad->getNumClauses();
+ unsigned int HandlersFound = 0;
+ BasicBlock *BB = LPad->getParent();
+
+ DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
+
+ if (NumClauses == 0) {
+ // This landing pad contains only cleanup code.
+ CleanupHandler *Action = new CleanupHandler(BB);
+ CleanupHandlerMap[BB] = Action;
+ Actions.insertCleanupHandler(Action);
+ DEBUG(dbgs() << " Assuming cleanup code in block " << BB->getName()
+ << "\n");
+ assert(LPad->isCleanup());
+ return;
+ }
+
+ VisitedBlockSet VisitedBlocks;
+
+ while (HandlersFound != NumClauses) {
+ BasicBlock *NextBB = nullptr;
+
+ // See if the clause we're looking for is a catch-all.
+ // If so, the catch begins immediately.
+ if (isa<ConstantPointerNull>(LPad->getClause(HandlersFound))) {
+ // The catch all must occur last.
+ assert(HandlersFound == NumClauses - 1);
+
+ // For C++ EH, check if there is any interesting cleanup code before we
+ // begin the catch. This is important because cleanups cannot rethrow
+ // exceptions but code called from catches can. For SEH, it isn't
+ // important if some finally code before a catch-all is executed out of
+ // line or after recovering from the exception.
+ if (Personality == EHPersonality::MSVC_CXX) {
+ if (auto *CleanupAction = findCleanupHandler(BB, BB)) {
+ // Add a cleanup entry to the list
+ Actions.insertCleanupHandler(CleanupAction);
+ DEBUG(dbgs() << " Found cleanup code in block "
+ << CleanupAction->getStartBlock()->getName() << "\n");
+ }
+ }
+
+ // Add the catch handler to the action list.
+ CatchHandler *Action =
+ new CatchHandler(BB, LPad->getClause(HandlersFound), nullptr);
+ CatchHandlerMap[BB] = Action;
+ Actions.insertCatchHandler(Action);
+ DEBUG(dbgs() << " Catch all handler at block " << BB->getName() << "\n");
+ ++HandlersFound;
+
+ // Once we reach a catch-all, don't expect to hit a resume instruction.
+ BB = nullptr;
+ break;
+ }
+
+ CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
+ // See if there is any interesting code executed before the dispatch.
+ if (auto *CleanupAction =
+ findCleanupHandler(BB, CatchAction->getStartBlock())) {
+ // Add a cleanup entry to the list
+ Actions.insertCleanupHandler(CleanupAction);
+ DEBUG(dbgs() << " Found cleanup code in block "
+ << CleanupAction->getStartBlock()->getName() << "\n");
+ }
+
+ assert(CatchAction);
+ ++HandlersFound;
+
+ // Add the catch handler to the action list.
+ Actions.insertCatchHandler(CatchAction);
+ DEBUG(dbgs() << " Found catch dispatch in block "
+ << CatchAction->getStartBlock()->getName() << "\n");
+
+ // Move on to the block after the catch handler.
+ BB = NextBB;
+ }
+
+ // If we didn't wind up in a catch-all, see if there is any interesting code
+ // executed before the resume.
+ if (auto *CleanupAction = findCleanupHandler(BB, BB)) {
+ // Add a cleanup entry to the list
+ Actions.insertCleanupHandler(CleanupAction);
+ DEBUG(dbgs() << " Found cleanup code in block "
+ << CleanupAction->getStartBlock()->getName() << "\n");
+ }
+
+ // It's possible that some optimization moved code into a landingpad that
+ // wasn't
+ // previously being used for cleanup. If that happens, we need to execute
+ // that
+ // extra code from a cleanup handler.
+ if (Actions.includesCleanup() && !LPad->isCleanup())
+ LPad->setCleanup(true);
+}
+
+// This function searches starting with the input block for the next
+// block that terminates with a branch whose condition is based on a selector
+// comparison. This may be the input block. See the mapLandingPadBlocks
+// comments for a discussion of control flow assumptions.
+//
+CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
+ BasicBlock *&NextBB,
+ VisitedBlockSet &VisitedBlocks) {
+ // See if we've already found a catch handler use it.
+ // Call count() first to avoid creating a null entry for blocks
+ // we haven't seen before.
+ if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
+ CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]);
+ NextBB = Action->getNextBB();
+ return Action;
}
-#endif
+ // VisitedBlocks applies only to the current search. We still
+ // need to consider blocks that we've visited while mapping other
+ // landing pads.
+ VisitedBlocks.insert(BB);
+
+ BasicBlock *CatchBlock = nullptr;
+ Constant *Selector = nullptr;
+
+ // If this is the first time we've visited this block from any landing pad
+ // look to see if it is a selector dispatch block.
+ if (!CatchHandlerMap.count(BB)) {
+ if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
+ CatchHandler *Action = new CatchHandler(BB, Selector, NextBB);
+ CatchHandlerMap[BB] = Action;
+ return Action;
+ }
+ }
+
+ // Visit each successor, looking for the dispatch.
+ // FIXME: We expect to find the dispatch quickly, so this will probably
+ // work better as a breadth first search.
+ for (BasicBlock *Succ : successors(BB)) {
+ if (VisitedBlocks.count(Succ))
+ continue;
+
+ CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks);
+ if (Action)
+ return Action;
+ }
+ return nullptr;
+}
+
+// These are helper functions to combine repeated code from findCleanupHandler.
+static CleanupHandler *createCleanupHandler(CleanupHandlerMapTy &CleanupHandlerMap,
+ BasicBlock *BB) {
+ CleanupHandler *Action = new CleanupHandler(BB);
+ CleanupHandlerMap[BB] = Action;
+ return Action;
+}
+
+// This function searches starting with the input block for the next block that
+// contains code that is not part of a catch handler and would not be eliminated
+// during handler outlining.
+//
+CleanupHandler *WinEHPrepare::findCleanupHandler(BasicBlock *StartBB,
+ BasicBlock *EndBB) {
+ // Here we will skip over the following:
+ //
+ // landing pad prolog:
+ //
+ // Unconditional branches
+ //
+ // Selector dispatch
+ //
+ // Resume pattern
+ //
+ // Anything else marks the start of an interesting block
+
+ BasicBlock *BB = StartBB;
+ // Anything other than an unconditional branch will kick us out of this loop
+ // one way or another.
+ while (BB) {
+ // If we've already scanned this block, don't scan it again. If it is
+ // a cleanup block, there will be an action in the CleanupHandlerMap.
+ // If we've scanned it and it is not a cleanup block, there will be a
+ // nullptr in the CleanupHandlerMap. If we have not scanned it, there will
+ // be no entry in the CleanupHandlerMap. We must call count() first to
+ // avoid creating a null entry for blocks we haven't scanned.
+ if (CleanupHandlerMap.count(BB)) {
+ if (auto *Action = CleanupHandlerMap[BB]) {
+ return cast<CleanupHandler>(Action);
+ } else {
+ // Here we handle the case where the cleanup handler map contains a
+ // value for this block but the value is a nullptr. This means that
+ // we have previously analyzed the block and determined that it did
+ // not contain any cleanup code. Based on the earlier analysis, we
+ // know the the block must end in either an unconditional branch, a
+ // resume or a conditional branch that is predicated on a comparison
+ // with a selector. Either the resume or the selector dispatch
+ // would terminate the search for cleanup code, so the unconditional
+ // branch is the only case for which we might need to continue
+ // searching.
+ if (BB == EndBB)
+ return nullptr;
+ BasicBlock *SuccBB;
+ if (!match(BB->getTerminator(), m_UnconditionalBr(SuccBB)))
+ return nullptr;
+ BB = SuccBB;
+ continue;
+ }
+ }
+
+ // Create an entry in the cleanup handler map for this block. Initially
+ // we create an entry that says this isn't a cleanup block. If we find
+ // cleanup code, the caller will replace this entry.
+ CleanupHandlerMap[BB] = nullptr;
+
+ TerminatorInst *Terminator = BB->getTerminator();
+
+ // Landing pad blocks have extra instructions we need to accept.
+ LandingPadMap *LPadMap = nullptr;
+ if (BB->isLandingPad()) {
+ LandingPadInst *LPad = BB->getLandingPadInst();
+ LPadMap = &LPadMaps[LPad];
+ if (!LPadMap->isInitialized())
+ LPadMap->mapLandingPad(LPad);
+ }
+
+ // Look for the bare resume pattern:
+ // %exn2 = load i8** %exn.slot
+ // %sel2 = load i32* %ehselector.slot
+ // %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn2, 0
+ // %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel2, 1
+ // resume { i8*, i32 } %lpad.val2
+ if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
+ InsertValueInst *Insert1 = nullptr;
+ InsertValueInst *Insert2 = nullptr;
+ Value *ResumeVal = Resume->getOperand(0);
+ // If there is only one landingpad, we may use the lpad directly with no
+ // insertions.
+ if (isa<LandingPadInst>(ResumeVal))
+ return nullptr;
+ if (!isa<PHINode>(ResumeVal)) {
+ Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
+ if (!Insert2)
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
+ if (!Insert1)
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE; ++II) {
+ Instruction *Inst = II;
+ if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+ continue;
+ if (Inst == Insert1 || Inst == Insert2 || Inst == Resume)
+ continue;
+ if (!Inst->hasOneUse() ||
+ (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
+ }
+ return nullptr;
+ }
+
+ BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
+ if (Branch) {
+ if (Branch->isConditional()) {
+ // Look for the selector dispatch.
+ // %sel = load i32* %ehselector.slot
+ // %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
+ // %matches = icmp eq i32 %sel12, %2
+ // br i1 %matches, label %catch14, label %eh.resume
+ CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
+ if (!Compare || !Compare->isEquality())
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(),
+ IE = BB->end();
+ II != IE; ++II) {
+ Instruction *Inst = II;
+ if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+ continue;
+ if (Inst == Compare || Inst == Branch)
+ continue;
+ if (!Inst->hasOneUse() || (Inst->user_back() != Compare))
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
+ continue;
+ if (!isa<LoadInst>(Inst))
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
+ // The selector dispatch block should always terminate our search.
+ assert(BB == EndBB);
+ return nullptr;
+ } else {
+ // Look for empty blocks with unconditional branches.
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(),
+ IE = BB->end();
+ II != IE; ++II) {
+ Instruction *Inst = II;
+ if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
+ continue;
+ if (Inst == Branch)
+ continue;
+ if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
+ continue;
+ // Anything else makes this interesting cleanup code.
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
+ if (BB == EndBB)
+ return nullptr;
+ // The branch was unconditional.
+ BB = Branch->getSuccessor(0);
+ continue;
+ } // End else of if branch was conditional
+ } // End if Branch
+
+ // Anything else makes this interesting cleanup code.
+ return createCleanupHandler(CleanupHandlerMap, BB);
+ }
return nullptr;
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 7d77290..9f56214 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -9,6 +9,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DataTypes.h"
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index fdb6dd2..cd6fbef 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -67,8 +67,7 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) {
// A single location description describing the location of the object...
StringRef str = data.getData().substr(Offset, Bytes);
Offset += Bytes;
- E.Loc.reserve(str.size());
- std::copy(str.begin(), str.end(), std::back_inserter(E.Loc));
+ E.Loc.append(str.begin(), str.end());
Loc.Entries.push_back(std::move(E));
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 45bd197..6946f83 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -18,6 +18,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
+#include <climits>
using namespace llvm;
using namespace dwarf;
using namespace syntax;
@@ -557,6 +558,24 @@ Optional<uint64_t> DWARFFormValue::getAsUnsignedConstant() const {
return Value.uval;
}
+Optional<int64_t> DWARFFormValue::getAsSignedConstant() const {
+ if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) ||
+ (Form == DW_FORM_udata && uint64_t(LLONG_MAX) < Value.uval))
+ return None;
+ switch (Form) {
+ case DW_FORM_data4:
+ return int32_t(Value.uval);
+ case DW_FORM_data2:
+ return int16_t(Value.uval);
+ case DW_FORM_data1:
+ return int8_t(Value.uval);
+ case DW_FORM_sdata:
+ case DW_FORM_data8:
+ default:
+ return Value.sval;
+ }
+}
+
Optional<ArrayRef<uint8_t>> DWARFFormValue::getAsBlock() const {
if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc))
return None;
diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt
index 87e357e..ed8c674 100644
--- a/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/lib/DebugInfo/PDB/CMakeLists.txt
@@ -73,4 +73,4 @@ add_llvm_library(LLVMDebugInfoPDB
${LIBPDB_ADDITIONAL_HEADER_DIRS}
)
-target_link_libraries(LLVMDebugInfoPDB ${cmake_2_8_12_INTERFACE} "${LIBPDB_ADDITIONAL_LIBRARIES}")
+target_link_libraries(LLVMDebugInfoPDB INTERFACE "${LIBPDB_ADDITIONAL_LIBRARIES}")
diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp
index 24791f2..4966bea 100644
--- a/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -23,28 +23,45 @@ namespace {}
DIASession::DIASession(CComPtr<IDiaSession> DiaSession) : Session(DiaSession) {}
-DIASession *DIASession::createFromPdb(StringRef Path) {
- CComPtr<IDiaDataSource> DataSource;
- CComPtr<IDiaSession> Session;
+PDB_ErrorCode DIASession::createFromPdb(StringRef Path,
+ std::unique_ptr<IPDBSession> &Session) {
+ CComPtr<IDiaDataSource> DiaDataSource;
+ CComPtr<IDiaSession> DiaSession;
// We assume that CoInitializeEx has already been called by the executable.
- HRESULT Result = ::CoCreateInstance(CLSID_DiaSource, nullptr,
- CLSCTX_INPROC_SERVER, IID_IDiaDataSource,
- reinterpret_cast<LPVOID *>(&DataSource));
+ HRESULT Result = ::CoCreateInstance(
+ CLSID_DiaSource, nullptr, CLSCTX_INPROC_SERVER, IID_IDiaDataSource,
+ reinterpret_cast<LPVOID *>(&DiaDataSource));
if (FAILED(Result))
- return nullptr;
+ return PDB_ErrorCode::NoPdbImpl;
llvm::SmallVector<UTF16, 128> Path16;
if (!llvm::convertUTF8ToUTF16String(Path, Path16))
- return nullptr;
+ return PDB_ErrorCode::InvalidPath;
const wchar_t *Path16Str = reinterpret_cast<const wchar_t*>(Path16.data());
- if (FAILED(DataSource->loadDataFromPdb(Path16Str)))
- return nullptr;
-
- if (FAILED(DataSource->openSession(&Session)))
- return nullptr;
- return new DIASession(Session);
+ if (FAILED(Result = DiaDataSource->loadDataFromPdb(Path16Str))) {
+ if (Result == E_PDB_NOT_FOUND)
+ return PDB_ErrorCode::InvalidPath;
+ else if (Result == E_PDB_FORMAT)
+ return PDB_ErrorCode::InvalidFileFormat;
+ else if (Result == E_INVALIDARG)
+ return PDB_ErrorCode::InvalidParameter;
+ else if (Result == E_UNEXPECTED)
+ return PDB_ErrorCode::AlreadyLoaded;
+ else
+ return PDB_ErrorCode::UnknownError;
+ }
+
+ if (FAILED(Result = DiaDataSource->openSession(&DiaSession))) {
+ if (Result == E_OUTOFMEMORY)
+ return PDB_ErrorCode::NoMemory;
+ else
+ return PDB_ErrorCode::UnknownError;
+ }
+
+ Session.reset(new DIASession(DiaSession));
+ return PDB_ErrorCode::Success;
}
uint64_t DIASession::getLoadAddress() const {
diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp
index aa84c28..a07396d 100644
--- a/lib/DebugInfo/PDB/PDB.cpp
+++ b/lib/DebugInfo/PDB/PDB.cpp
@@ -20,11 +20,11 @@
using namespace llvm;
-std::unique_ptr<IPDBSession> llvm::createPDBReader(PDB_ReaderType Type,
- StringRef Path) {
+PDB_ErrorCode llvm::createPDBReader(PDB_ReaderType Type, StringRef Path,
+ std::unique_ptr<IPDBSession> &Session) {
// Create the correct concrete instance type based on the value of Type.
#if HAVE_DIA_SDK
- return std::unique_ptr<DIASession>(DIASession::createFromPdb(Path));
+ return DIASession::createFromPdb(Path, Session);
#endif
- return nullptr;
+ return PDB_ErrorCode::NoPdbImpl;
}
diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp
index 1002b2e..4b9437c 100644
--- a/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -247,27 +247,21 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_SymType &Tag) {
return OS;
}
-raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_BuiltinType &Type) {
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_MemberAccess &Access) {
+ switch (Access) {
+ CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Public, "public", OS)
+ CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Protected, "protected", OS)
+ CASE_OUTPUT_ENUM_CLASS_STR(PDB_MemberAccess, Private, "private", OS)
+ }
+ return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const PDB_UdtType &Type) {
switch (Type) {
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Void, "void", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Char, "char", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, WCharT, "wchar_t", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Int, "int", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, UInt, "uint", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Float, "float", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, BCD, "BCD", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Bool, "bool", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Long, "long", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, ULong, "ulong", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Currency, "CURRENCY", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Date, "DATE", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Variant, "VARIANT", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Complex, "complex", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, Bitfield, "bitfield", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, BSTR, "BSTR", OS)
- CASE_OUTPUT_ENUM_CLASS_STR(PDB_BuiltinType, HResult, "HRESULT", OS)
- default:
- break;
+ CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Class, "class", OS)
+ CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Struct, "struct", OS)
+ CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Interface, "interface", OS)
+ CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Union, "union", OS)
}
return OS;
}
@@ -309,7 +303,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Variant &Value) {
OS << Value.Int64;
break;
case PDB_VariantType::Int8:
- OS << Value.Int8;
+ OS << static_cast<int>(Value.Int8);
break;
case PDB_VariantType::Single:
OS << Value.Single;
@@ -324,12 +318,11 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Variant &Value) {
OS << Value.UInt64;
break;
case PDB_VariantType::UInt8:
- OS << Value.UInt8;
+ OS << static_cast<unsigned>(Value.UInt8);
break;
default:
OS << Value.Type;
}
- OS << " {" << Value.Type << "}";
return OS;
}
diff --git a/lib/DebugInfo/PDB/PDBSymDumper.cpp b/lib/DebugInfo/PDB/PDBSymDumper.cpp
index 0f29c74..121e2d1 100644
--- a/lib/DebugInfo/PDB/PDBSymDumper.cpp
+++ b/lib/DebugInfo/PDB/PDBSymDumper.cpp
@@ -21,157 +21,126 @@ PDBSymDumper::PDBSymDumper(bool ShouldRequireImpl)
PDBSymDumper::~PDBSymDumper() {}
-void PDBSymDumper::dump(const PDBSymbolAnnotation &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolAnnotation &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolAnnotation)
}
-void PDBSymDumper::dump(const PDBSymbolBlock &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolBlock &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolBlock)
}
-void PDBSymDumper::dump(const PDBSymbolCompiland &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolCompiland &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompiland)
}
-void PDBSymDumper::dump(const PDBSymbolCompilandDetails &Symbol,
- raw_ostream &OS, int Indent) {
+void PDBSymDumper::dump(const PDBSymbolCompilandDetails &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompilandDetails)
}
-void PDBSymDumper::dump(const PDBSymbolCompilandEnv &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolCompilandEnv &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolCompilandEnv)
}
-void PDBSymDumper::dump(const PDBSymbolCustom &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolCustom &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolCustom)
}
-void PDBSymDumper::dump(const PDBSymbolData &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolData &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolData)
}
-void PDBSymDumper::dump(const PDBSymbolExe &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolExe &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolExe)
}
-void PDBSymDumper::dump(const PDBSymbolFunc &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolFunc &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolFunc)
}
-void PDBSymDumper::dump(const PDBSymbolFuncDebugEnd &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolFuncDebugEnd &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolFuncDebugEnd)
}
-void PDBSymDumper::dump(const PDBSymbolFuncDebugStart &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolFuncDebugStart &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolFuncDebugStart)
}
-void PDBSymDumper::dump(const PDBSymbolLabel &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolLabel &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolLabel)
}
-void PDBSymDumper::dump(const PDBSymbolPublicSymbol &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolPublicSymbol &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolPublicSymbol)
}
-void PDBSymDumper::dump(const PDBSymbolThunk &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolThunk &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolThunk)
}
-void PDBSymDumper::dump(const PDBSymbolTypeArray &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeArray &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeArray)
}
-void PDBSymDumper::dump(const PDBSymbolTypeBaseClass &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeBaseClass &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeBaseClass)
}
-void PDBSymDumper::dump(const PDBSymbolTypeBuiltin &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeBuiltin &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeBuiltin)
}
-void PDBSymDumper::dump(const PDBSymbolTypeCustom &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeCustom &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeCustom)
}
-void PDBSymDumper::dump(const PDBSymbolTypeDimension &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeDimension &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeDimension)
}
-void PDBSymDumper::dump(const PDBSymbolTypeEnum &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeEnum &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeEnum)
}
-void PDBSymDumper::dump(const PDBSymbolTypeFriend &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeFriend &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFriend)
}
-void PDBSymDumper::dump(const PDBSymbolTypeFunctionArg &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeFunctionArg &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFunctionArg)
}
-void PDBSymDumper::dump(const PDBSymbolTypeFunctionSig &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeFunctionSig &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeFunctionSig)
}
-void PDBSymDumper::dump(const PDBSymbolTypeManaged &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeManaged &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeManaged)
}
-void PDBSymDumper::dump(const PDBSymbolTypePointer &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypePointer &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypePointer)
}
-void PDBSymDumper::dump(const PDBSymbolTypeTypedef &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeTypedef &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeTypedef)
}
-void PDBSymDumper::dump(const PDBSymbolTypeUDT &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeUDT &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeUDT)
}
-void PDBSymDumper::dump(const PDBSymbolTypeVTable &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeVTable &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeVTable)
}
-void PDBSymDumper::dump(const PDBSymbolTypeVTableShape &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolTypeVTableShape &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolTypeVTableShape)
}
-void PDBSymDumper::dump(const PDBSymbolUnknown &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolUnknown &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolUnknown)
}
-void PDBSymDumper::dump(const PDBSymbolUsingNamespace &Symbol, raw_ostream &OS,
- int Indent) {
+void PDBSymDumper::dump(const PDBSymbolUsingNamespace &Symbol) {
PDB_SYMDUMP_UNREACHABLE(PDBSymbolUsingNamespace)
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
index 4c76e3b..a782cad 100644
--- a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
@@ -19,7 +19,6 @@ PDBSymbolAnnotation::PDBSymbolAnnotation(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolAnnotation::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolAnnotation::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
index bb159d5..46b0ea5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -20,7 +20,4 @@ PDBSymbolBlock::PDBSymbolBlock(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolBlock::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-}
+void PDBSymbolBlock::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
index 0c9b190..7436914 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -19,7 +19,6 @@ PDBSymbolCompiland::PDBSymbolCompiland(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolCompiland::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolCompiland::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
index 208d68f..7b351a0 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -20,7 +20,6 @@ PDBSymbolCompilandDetails::PDBSymbolCompilandDetails(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolCompilandDetails::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolCompilandDetails::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
index c54b8fb..e863ccf 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -26,7 +26,6 @@ std::string PDBSymbolCompilandEnv::getValue() const {
return std::string();
}
-void PDBSymbolCompilandEnv::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolCompilandEnv::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
index 1b6b50b..45faa0b 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -25,7 +25,4 @@ void PDBSymbolCustom::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) {
RawSymbol->getDataBytes(bytes);
}
-void PDBSymbolCustom::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-} \ No newline at end of file
+void PDBSymbolCustom::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } \ No newline at end of file
diff --git a/lib/DebugInfo/PDB/PDBSymbolData.cpp b/lib/DebugInfo/PDB/PDBSymbolData.cpp
index 6bf7e0f..60dcbc1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolData.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolData.cpp
@@ -24,7 +24,4 @@ std::unique_ptr<PDBSymbol> PDBSymbolData::getType() const {
return Session.getSymbolById(getTypeId());
}
-void PDBSymbolData::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-} \ No newline at end of file
+void PDBSymbolData::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } \ No newline at end of file
diff --git a/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/lib/DebugInfo/PDB/PDBSymbolExe.cpp
index ef09193..c9e34ea 100644
--- a/lib/DebugInfo/PDB/PDBSymbolExe.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolExe.cpp
@@ -19,7 +19,4 @@ PDBSymbolExe::PDBSymbolExe(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolExe::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-}
+void PDBSymbolExe::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index e2d859f..b14af07 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -98,7 +98,4 @@ std::unique_ptr<PDBSymbolTypeUDT> PDBSymbolFunc::getClassParent() const {
return Session.getConcreteSymbolById<PDBSymbolTypeUDT>(getClassParentId());
}
-void PDBSymbolFunc::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-}
+void PDBSymbolFunc::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
index c207488..8e559b3 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -20,7 +20,6 @@ PDBSymbolFuncDebugEnd::PDBSymbolFuncDebugEnd(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolFuncDebugEnd::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolFuncDebugEnd::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
index 83df22e..ff4254f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -20,7 +20,6 @@ PDBSymbolFuncDebugStart::PDBSymbolFuncDebugStart(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolFuncDebugStart::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolFuncDebugStart::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
index ce569e2..f39dee8 100644
--- a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
@@ -19,7 +19,4 @@ PDBSymbolLabel::PDBSymbolLabel(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolLabel::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-}
+void PDBSymbolLabel::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
index a7f156c..bd6fe89 100644
--- a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -20,7 +20,6 @@ PDBSymbolPublicSymbol::PDBSymbolPublicSymbol(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolPublicSymbol::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolPublicSymbol::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
index edade83..733eb5f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
@@ -19,7 +19,4 @@ PDBSymbolThunk::PDBSymbolThunk(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolThunk::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-}
+void PDBSymbolThunk::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
index ffe6c80..1980965 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
@@ -24,7 +24,6 @@ std::unique_ptr<PDBSymbol> PDBSymbolTypeArray::getElementType() const {
return Session.getSymbolById(getTypeId());
}
-void PDBSymbolTypeArray::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeArray::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
index c44cc52..c1f0d2f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -20,7 +20,6 @@ PDBSymbolTypeBaseClass::PDBSymbolTypeBaseClass(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeBaseClass::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeBaseClass::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
index f0c94c7..b302b66 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
@@ -19,7 +19,6 @@ PDBSymbolTypeBuiltin::PDBSymbolTypeBuiltin(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeBuiltin::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeBuiltin::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
index 0fa8f45..cc391f1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -20,7 +20,6 @@ PDBSymbolTypeCustom::PDBSymbolTypeCustom(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeCustom::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeCustom::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
index 47fb08d..1e19d0b 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -21,7 +21,6 @@ PDBSymbolTypeDimension::PDBSymbolTypeDimension(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeDimension::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeDimension::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
index 121d41e..8dd26a3 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
@@ -9,7 +9,10 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include <utility>
@@ -19,7 +22,13 @@ PDBSymbolTypeEnum::PDBSymbolTypeEnum(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeEnum::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+std::unique_ptr<PDBSymbolTypeUDT> PDBSymbolTypeEnum::getClassParent() const {
+ return Session.getConcreteSymbolById<PDBSymbolTypeUDT>(getClassParentId());
}
+
+std::unique_ptr<PDBSymbolTypeBuiltin>
+PDBSymbolTypeEnum::getUnderlyingType() const {
+ return Session.getConcreteSymbolById<PDBSymbolTypeBuiltin>(getTypeId());
+}
+
+void PDBSymbolTypeEnum::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
index b2bf72e..d332660 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -20,7 +20,6 @@ PDBSymbolTypeFriend::PDBSymbolTypeFriend(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeFriend::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeFriend::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
index f394c04..f8f71ea 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
@@ -19,7 +19,6 @@ PDBSymbolTypeFunctionArg::PDBSymbolTypeFunctionArg(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeFunctionArg::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeFunctionArg::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index 1ba397b..8018206 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -83,7 +83,6 @@ std::unique_ptr<PDBSymbol> PDBSymbolTypeFunctionSig::getClassParent() const {
return Session.getSymbolById(ClassId);
}
-void PDBSymbolTypeFunctionSig::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeFunctionSig::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
index e04fb66..a7fac30 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -20,7 +20,6 @@ PDBSymbolTypeManaged::PDBSymbolTypeManaged(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeManaged::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeManaged::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
index d274bf5..082ed83 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
@@ -24,7 +24,6 @@ std::unique_ptr<PDBSymbol> PDBSymbolTypePointer::getPointeeType() const {
return Session.getSymbolById(getTypeId());
}
-void PDBSymbolTypePointer::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypePointer::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
index 12e3ead..5a42699 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
@@ -19,7 +19,6 @@ PDBSymbolTypeTypedef::PDBSymbolTypeTypedef(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeTypedef::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeTypedef::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
index 8a72368..2b5da29 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
@@ -19,7 +19,4 @@ PDBSymbolTypeUDT::PDBSymbolTypeUDT(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeUDT::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-}
+void PDBSymbolTypeUDT::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
index a100526..b465d02 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
@@ -19,7 +19,6 @@ PDBSymbolTypeVTable::PDBSymbolTypeVTable(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeVTable::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeVTable::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
index 6aaa668..16052f1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -20,7 +20,6 @@ PDBSymbolTypeVTableShape::PDBSymbolTypeVTableShape(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolTypeVTableShape::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolTypeVTableShape::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
index 9cfb88a..48dc115 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp
@@ -20,7 +20,4 @@ PDBSymbolUnknown::PDBSymbolUnknown(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolUnknown::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
-}
+void PDBSymbolUnknown::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
index 9176dfb..6cf13de 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -20,7 +20,6 @@ PDBSymbolUsingNamespace::PDBSymbolUsingNamespace(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
: PDBSymbol(PDBSession, std::move(Symbol)) {}
-void PDBSymbolUsingNamespace::dump(raw_ostream &OS, int Indent,
- PDBSymDumper &Dumper) const {
- Dumper.dump(*this, OS, Indent);
+void PDBSymbolUsingNamespace::dump(PDBSymDumper &Dumper) const {
+ Dumper.dump(*this);
}
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 12e0e6a..c586ba7 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ExecutionEngine/GenericValue.h"
@@ -399,33 +400,12 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
return runFunction(Fn, GVArgs).IntVal.getZExtValue();
}
-EngineBuilder::EngineBuilder() {
- InitEngine();
-}
+EngineBuilder::EngineBuilder() : EngineBuilder(nullptr) {}
EngineBuilder::EngineBuilder(std::unique_ptr<Module> M)
- : M(std::move(M)), MCJMM(nullptr) {
- InitEngine();
-}
-
-EngineBuilder::~EngineBuilder() {}
-
-EngineBuilder &EngineBuilder::setMCJITMemoryManager(
- std::unique_ptr<RTDyldMemoryManager> mcjmm) {
- MCJMM = std::move(mcjmm);
- return *this;
-}
-
-void EngineBuilder::InitEngine() {
- WhichEngine = EngineKind::Either;
- ErrorStr = nullptr;
- OptLevel = CodeGenOpt::Default;
- MCJMM = nullptr;
- Options = TargetOptions();
- RelocModel = Reloc::Default;
- CMModel = CodeModel::JITDefault;
- UseOrcMCJITReplacement = false;
-
+ : M(std::move(M)), WhichEngine(EngineKind::Either), ErrorStr(nullptr),
+ OptLevel(CodeGenOpt::Default), MCJMM(nullptr), RelocModel(Reloc::Default),
+ CMModel(CodeModel::JITDefault), UseOrcMCJITReplacement(false) {
// IR module verification is enabled by default in debug builds, and disabled
// by default in release builds.
#ifndef NDEBUG
@@ -435,6 +415,14 @@ void EngineBuilder::InitEngine() {
#endif
}
+EngineBuilder::~EngineBuilder() = default;
+
+EngineBuilder &EngineBuilder::setMCJITMemoryManager(
+ std::unique_ptr<RTDyldMemoryManager> mcjmm) {
+ MCJMM = std::move(mcjmm);
+ return *this;
+}
+
ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
std::unique_ptr<TargetMachine> TheTM(TM); // Take ownership.
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
index 1aac3ac..4dbc2df 100644
--- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -13,7 +13,7 @@ add_llvm_library(LLVMInterpreter
)
if( LLVM_ENABLE_FFI )
- target_link_libraries( LLVMInterpreter ${cmake_2_8_12_PRIVATE} ${FFI_LIBRARY_PATH} )
+ target_link_libraries( LLVMInterpreter PRIVATE ${FFI_LIBRARY_PATH} )
endif()
add_dependencies(LLVMInterpreter intrinsics_gen)
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 93bb2d1..2e8eb16 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cmath>
using namespace llvm;
@@ -464,14 +465,14 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
return Dest; \
}
-#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC) \
- if (TY->isVectorTy()) { \
- GenericValue DestMask = Dest; \
- Dest = _FUNC(Src1, Src2, Ty); \
- for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++) \
- if (DestMask.AggregateVal[_i].IntVal == true) \
- Dest.AggregateVal[_i].IntVal = APInt(1,true); \
- return Dest; \
+#define IMPLEMENT_VECTOR_UNORDERED(TY, X, Y, FUNC) \
+ if (TY->isVectorTy()) { \
+ GenericValue DestMask = Dest; \
+ Dest = FUNC(Src1, Src2, Ty); \
+ for (size_t _i = 0; _i < Src1.AggregateVal.size(); _i++) \
+ if (DestMask.AggregateVal[_i].IntVal == true) \
+ Dest.AggregateVal[_i].IntVal = APInt(1, true); \
+ return Dest; \
}
static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index e500d3d..20b8553 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -137,8 +137,7 @@ std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) {
legacy::PassManager PM;
- M->setDataLayout(TM->getDataLayout());
- PM.add(new DataLayoutPass());
+ M->setDataLayout(*TM->getDataLayout());
// The RuntimeDyld will take ownership of this shortly
SmallVector<char, 4096> ObjBufferSV;
@@ -258,7 +257,7 @@ uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) {
Mangler Mang(TM->getDataLayout());
SmallString<128> FullName;
Mang.getNameWithPrefix(FullName, Name);
- return Dyld.getSymbolLoadAddress(FullName);
+ return Dyld.getSymbol(FullName).getAddress();
}
Module *MCJIT::findModuleForSymbol(const std::string &Name,
@@ -384,7 +383,7 @@ void *MCJIT::getPointerToFunction(Function *F) {
//
// This is the accessor for the target address, so make sure to check the
// load address of the symbol, not the local address.
- return (void*)Dyld.getSymbolLoadAddress(Name);
+ return (void*)Dyld.getSymbol(Name).getAddress();
}
void MCJIT::runStaticConstructorsDestructorsInModulePtrSet(
diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 61c947f..8cf490f 100644
--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/Orc/CloneSubModule.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 1b7b161..00e39bb 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -133,8 +133,8 @@ public:
// If this module doesn't have a DataLayout attached then attach the
// default.
- if (!M->getDataLayout())
- M->setDataLayout(getDataLayout());
+ if (M->getDataLayout().isDefault())
+ M->setDataLayout(*getDataLayout());
Modules.push_back(std::move(M));
std::vector<Module *> Ms;
diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
index b5dda8e..6fe5301 100644
--- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
@@ -39,7 +39,7 @@ template <typename OStream> void restoreX86Regs(OStream &OS) {
}
template <typename TargetT>
-uint64_t executeCompileCallback(JITCompileCallbackManagerBase<TargetT> *JCBM,
+uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
TargetAddress CallbackID) {
return JCBM->executeCompileCallback(CallbackID);
}
@@ -52,7 +52,8 @@ namespace orc {
const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
void OrcX86_64::insertResolverBlock(
- Module &M, JITCompileCallbackManagerBase<OrcX86_64> &JCBM) {
+ Module &M, JITCompileCallbackManagerBase &JCBM) {
+ const unsigned X86_64_TrampolineLength = 6;
auto CallbackPtr = executeCompileCallback<OrcX86_64>;
uint64_t CallbackAddr =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(CallbackPtr));
@@ -77,6 +78,7 @@ void OrcX86_64::insertResolverBlock(
AsmStream << " leaq jit_callback_manager_addr(%rip), %rdi\n"
<< " movq (%rdi), %rdi\n"
<< " movq " << ReturnAddrOffset << "(%rsp), %rsi\n"
+ << " subq $" << X86_64_TrampolineLength << ", %rsi\n"
<< " movabsq $" << CallbackAddr << ", %rax\n"
<< " callq *%rax\n"
<< " movq %rax, " << ReturnAddrOffset << "(%rsp)\n";
diff --git a/lib/ExecutionEngine/RuntimeDyld/Android.mk b/lib/ExecutionEngine/RuntimeDyld/Android.mk
index 76aae67..40fdd7c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Android.mk
+++ b/lib/ExecutionEngine/RuntimeDyld/Android.mk
@@ -8,6 +8,7 @@ LOCAL_SRC_FILES := \
RTDyldMemoryManager.cpp \
RuntimeDyldChecker.cpp \
RuntimeDyld.cpp \
+ RuntimeDyldCOFF.cpp \
RuntimeDyldELF.cpp \
RuntimeDyldMachO.cpp
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index 12bbcc6..e78408a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMRuntimeDyld
RTDyldMemoryManager.cpp
RuntimeDyld.cpp
RuntimeDyldChecker.cpp
+ RuntimeDyldCOFF.cpp
RuntimeDyldELF.cpp
RuntimeDyldMachO.cpp
)
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 54f1a1c..a0ed7cf 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -13,10 +13,12 @@
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "RuntimeDyldCheckerImpl.h"
+#include "RuntimeDyldCOFF.h"
#include "RuntimeDyldELF.h"
#include "RuntimeDyldImpl.h"
#include "RuntimeDyldMachO.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MutexGuard.h"
@@ -195,10 +197,13 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
<< " SID: " << SectionID << " Offset: "
<< format("%p", (uintptr_t)SectOffset)
<< " flags: " << Flags << "\n");
- SymbolInfo::Visibility Vis =
- (Flags & SymbolRef::SF_Exported) ?
- SymbolInfo::Default : SymbolInfo::Hidden;
- GlobalSymbolTable[Name] = SymbolInfo(SectionID, SectOffset, Vis);
+ JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
+ if (Flags & SymbolRef::SF_Weak)
+ RTDyldSymFlags |= JITSymbolFlags::Weak;
+ if (Flags & SymbolRef::SF_Exported)
+ RTDyldSymFlags |= JITSymbolFlags::Exported;
+ GlobalSymbolTable[Name] =
+ SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags);
}
}
}
@@ -264,6 +269,20 @@ static bool isRequiredForExecution(const SectionRef &Section) {
const ObjectFile *Obj = Section.getObject();
if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
return ELFObj->getSectionFlags(Section) & ELF::SHF_ALLOC;
+ if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj)) {
+ const coff_section *CoffSection = COFFObj->getCOFFSection(Section);
+ // Avoid loading zero-sized COFF sections.
+ // In PE files, VirtualSize gives the section size, and SizeOfRawData
+ // may be zero for sections with content. In Obj files, SizeOfRawData
+ // gives the section size, and VirtualSize is always zero. Hence
+ // the need to check for both cases below.
+ bool HasContent = (CoffSection->VirtualSize > 0)
+ || (CoffSection->SizeOfRawData > 0);
+ bool IsDiscardable = CoffSection->Characteristics &
+ (COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_LNK_INFO);
+ return HasContent && !IsDiscardable;
+ }
+
assert(isa<MachOObjectFile>(Obj));
return true;
}
@@ -273,6 +292,15 @@ static bool isReadOnlyData(const SectionRef &Section) {
if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
return !(ELFObj->getSectionFlags(Section) &
(ELF::SHF_WRITE | ELF::SHF_EXECINSTR));
+ if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj))
+ return ((COFFObj->getCOFFSection(Section)->Characteristics &
+ (COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ
+ | COFF::IMAGE_SCN_MEM_WRITE))
+ ==
+ (COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+ | COFF::IMAGE_SCN_MEM_READ));
+
assert(isa<MachOObjectFile>(Obj));
return false;
}
@@ -281,6 +309,9 @@ static bool isZeroInit(const SectionRef &Section) {
const ObjectFile *Obj = Section.getObject();
if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj))
return ELFObj->getSectionType(Section) == ELF::SHT_NOBITS;
+ if (auto *COFFObj = dyn_cast<object::COFFObjectFile>(Obj))
+ return COFFObj->getCOFFSection(Section)->Characteristics &
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
auto *MachO = cast<MachOObjectFile>(Obj);
unsigned SectionType = MachO->getSectionType(Section);
@@ -497,12 +528,15 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
Offset += AlignOffset;
}
uint32_t Flags = Sym.getFlags();
- SymbolInfo::Visibility Vis =
- (Flags & SymbolRef::SF_Exported) ?
- SymbolInfo::Default : SymbolInfo::Hidden;
+ JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
+ if (Flags & SymbolRef::SF_Weak)
+ RTDyldSymFlags |= JITSymbolFlags::Weak;
+ if (Flags & SymbolRef::SF_Exported)
+ RTDyldSymFlags |= JITSymbolFlags::Exported;
DEBUG(dbgs() << "Allocating common symbol " << Name << " address "
<< format("%p", Addr) << "\n");
- GlobalSymbolTable[Name] = SymbolInfo(SectionID, Offset, Vis);
+ GlobalSymbolTable[Name] =
+ SymbolTableEntry(SectionID, Offset, RTDyldSymFlags);
Offset += Size;
Addr += Size;
}
@@ -512,7 +546,6 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
const SectionRef &Section, bool IsCode) {
StringRef data;
- Check(Section.getContents(data));
uint64_t Alignment64 = Section.getAlignment();
unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
@@ -542,6 +575,7 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
// Some sections, such as debug info, don't need to be loaded for execution.
// Leave those where they are.
if (IsRequired) {
+ Check(Section.getContents(data));
Allocate = DataSize + PaddingSize + StubBufSize;
Addr = IsCode ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID,
Name)
@@ -816,6 +850,15 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
RuntimeDyld::~RuntimeDyld() {}
+static std::unique_ptr<RuntimeDyldCOFF>
+createRuntimeDyldCOFF(Triple::ArchType Arch, RTDyldMemoryManager *MM,
+ bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
+ std::unique_ptr<RuntimeDyldCOFF> Dyld(RuntimeDyldCOFF::create(Arch, MM));
+ Dyld->setProcessAllSections(ProcessAllSections);
+ Dyld->setRuntimeDyldChecker(Checker);
+ return Dyld;
+}
+
static std::unique_ptr<RuntimeDyldELF>
createRuntimeDyldELF(RTDyldMemoryManager *MM, bool ProcessAllSections,
RuntimeDyldCheckerImpl *Checker) {
@@ -843,6 +886,10 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) {
Dyld = createRuntimeDyldMachO(
static_cast<Triple::ArchType>(Obj.getArch()), MM,
ProcessAllSections, Checker);
+ else if (Obj.isCOFF())
+ Dyld = createRuntimeDyldCOFF(
+ static_cast<Triple::ArchType>(Obj.getArch()), MM,
+ ProcessAllSections, Checker);
else
report_fatal_error("Incompatible object format!");
}
@@ -853,22 +900,16 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) {
return Dyld->loadObject(Obj);
}
-void *RuntimeDyld::getSymbolAddress(StringRef Name) const {
+void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const {
if (!Dyld)
return nullptr;
- return Dyld->getSymbolAddress(Name);
+ return Dyld->getSymbolLocalAddress(Name);
}
-uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) const {
+RuntimeDyld::SymbolInfo RuntimeDyld::getSymbol(StringRef Name) const {
if (!Dyld)
- return 0;
- return Dyld->getSymbolLoadAddress(Name);
-}
-
-uint64_t RuntimeDyld::getExportedSymbolLoadAddress(StringRef Name) const {
- if (!Dyld)
- return 0;
- return Dyld->getExportedSymbolLoadAddress(Name);
+ return nullptr;
+ return Dyld->getSymbol(Name);
}
void RuntimeDyld::resolveRelocations() { Dyld->resolveRelocations(); }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
new file mode 100644
index 0000000..56bcb8e
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -0,0 +1,85 @@
+//===-- RuntimeDyldCOFF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of COFF support for the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RuntimeDyldCOFF.h"
+#include "Targets/RuntimeDyldCOFFX86_64.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+#define DEBUG_TYPE "dyld"
+
+namespace {
+
+class LoadedCOFFObjectInfo : public RuntimeDyld::LoadedObjectInfo {
+public:
+ LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
+ unsigned EndIdx)
+ : RuntimeDyld::LoadedObjectInfo(RTDyld, BeginIdx, EndIdx) {}
+
+ OwningBinary<ObjectFile>
+ getObjectForDebug(const ObjectFile &Obj) const override {
+ return OwningBinary<ObjectFile>();
+ }
+};
+}
+
+namespace llvm {
+
+std::unique_ptr<RuntimeDyldCOFF>
+llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch, RTDyldMemoryManager *MM) {
+ switch (Arch) {
+ default:
+ llvm_unreachable("Unsupported target for RuntimeDyldCOFF.");
+ break;
+ case Triple::x86_64:
+ return make_unique<RuntimeDyldCOFFX86_64>(MM);
+ }
+}
+
+std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
+RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) {
+ unsigned SectionStartIdx, SectionEndIdx;
+ std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O);
+ return llvm::make_unique<LoadedCOFFObjectInfo>(*this, SectionStartIdx,
+ SectionEndIdx);
+}
+
+uint64_t RuntimeDyldCOFF::getSymbolOffset(const SymbolRef &Sym) {
+ uint64_t Address;
+ if (Sym.getAddress(Address))
+ return UnknownAddressOrSize;
+
+ if (Address == UnknownAddressOrSize)
+ return UnknownAddressOrSize;
+
+ const ObjectFile *Obj = Sym.getObject();
+ section_iterator SecI(Obj->section_end());
+ if (Sym.getSection(SecI))
+ return UnknownAddressOrSize;
+
+ if (SecI == Obj->section_end())
+ return UnknownAddressOrSize;
+
+ uint64_t SectionAddress = SecI->getAddress();
+ return Address - SectionAddress;
+}
+
+bool RuntimeDyldCOFF::isCompatibleFile(const object::ObjectFile &Obj) const {
+ return Obj.isCOFF();
+}
+
+} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
new file mode 100644
index 0000000..681a3e5
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
@@ -0,0 +1,46 @@
+//===-- RuntimeDyldCOFF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_COFF_H
+#define LLVM_RUNTIME_DYLD_COFF_H
+
+#include "RuntimeDyldImpl.h"
+#include "llvm/ADT/DenseMap.h"
+
+#define DEBUG_TYPE "dyld"
+
+using namespace llvm;
+
+namespace llvm {
+
+// Common base class for COFF dynamic linker support.
+// Concrete subclasses for each target can be found in ./Targets.
+class RuntimeDyldCOFF : public RuntimeDyldImpl {
+
+public:
+ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
+ loadObject(const object::ObjectFile &Obj) override;
+ bool isCompatibleFile(const object::ObjectFile &Obj) const override;
+ static std::unique_ptr<RuntimeDyldCOFF> create(Triple::ArchType Arch,
+ RTDyldMemoryManager *MM);
+
+protected:
+ RuntimeDyldCOFF(RTDyldMemoryManager *MM) : RuntimeDyldImpl(MM) {}
+ uint64_t getSymbolOffset(const SymbolRef &Sym);
+};
+
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 976a434..c991408 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -310,7 +310,7 @@ private:
"");
uint64_t SymbolAddr = PCtx.IsInsideLoad
- ? Checker.getSymbolLinkerAddr(Symbol)
+ ? Checker.getSymbolLocalAddr(Symbol)
: Checker.getSymbolRemoteAddr(Symbol);
uint64_t NextPC = SymbolAddr + InstSize;
@@ -437,7 +437,7 @@ private:
// The value for the symbol depends on the context we're evaluating in:
// Inside a load this is the address in the linker's memory, outside a
// load it's the address in the target processes memory.
- uint64_t Value = PCtx.IsInsideLoad ? Checker.getSymbolLinkerAddr(Symbol)
+ uint64_t Value = PCtx.IsInsideLoad ? Checker.getSymbolLocalAddr(Symbol)
: Checker.getSymbolRemoteAddr(Symbol);
// Looks like a plain symbol reference.
@@ -727,17 +727,17 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
}
bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
- return getRTDyld().getSymbolAddress(Symbol) != nullptr;
+ return getRTDyld().getSymbolLocalAddress(Symbol) != nullptr;
}
-uint64_t RuntimeDyldCheckerImpl::getSymbolLinkerAddr(StringRef Symbol) const {
+uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
return static_cast<uint64_t>(
- reinterpret_cast<uintptr_t>(getRTDyld().getSymbolAddress(Symbol)));
+ reinterpret_cast<uintptr_t>(getRTDyld().getSymbolLocalAddress(Symbol)));
}
uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const {
- if (uint64_t InternalSymbolAddr = getRTDyld().getSymbolLoadAddress(Symbol))
- return InternalSymbolAddr;
+ if (auto InternalSymbol = getRTDyld().getSymbol(Symbol))
+ return InternalSymbol.getAddress();
return getRTDyld().MemMgr->getSymbolAddress(Symbol);
}
@@ -929,6 +929,6 @@ bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix,
std::pair<uint64_t, std::string>
RuntimeDyldChecker::getSectionAddr(StringRef FileName, StringRef SectionName,
- bool LinkerAddress) {
- return Impl->getSectionAddr(FileName, SectionName, LinkerAddress);
+ bool LocalAddress) {
+ return Impl->getSectionAddr(FileName, SectionName, LocalAddress);
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
index de20c1e..e8d299a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h
@@ -42,7 +42,7 @@ private:
RuntimeDyldImpl &getRTDyld() const { return *RTDyld.Dyld; }
bool isSymbolValid(StringRef Symbol) const;
- uint64_t getSymbolLinkerAddr(StringRef Symbol) const;
+ uint64_t getSymbolLocalAddr(StringRef Symbol) const;
uint64_t getSymbolRemoteAddr(StringRef Symbol) const;
uint64_t readMemoryAtAddr(uint64_t Addr, unsigned Size) const;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 0f3ca0f..6278170 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1128,7 +1128,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
RangeOverflow = true;
}
}
- if (SymType == SymbolRef::ST_Unknown || RangeOverflow == true) {
+ if (SymType == SymbolRef::ST_Unknown || RangeOverflow) {
// It is an external symbol (SymbolRef::ST_Unknown) or within a range
// larger than 24-bits.
StubMap::const_iterator i = Stubs.find(Value);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index b4414b0..71260d0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -20,16 +20,6 @@
using namespace llvm;
namespace llvm {
-namespace {
-// Helper for extensive error checking in debug builds.
-std::error_code Check(std::error_code Err) {
- if (Err) {
- report_fatal_error(Err.message());
- }
- return Err;
-}
-
-} // end anonymous namespace
class RuntimeDyldELF : public RuntimeDyldImpl {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index f37a9a7..05060dd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -36,6 +36,14 @@ using namespace llvm::object;
namespace llvm {
+ // Helper for extensive error checking in debug builds.
+inline std::error_code Check(std::error_code Err) {
+ if (Err) {
+ report_fatal_error(Err.message());
+ }
+ return Err;
+}
+
class Twine;
/// SectionEntry - represents a section emitted into memory by the dynamic
@@ -156,27 +164,24 @@ public:
}
};
-/// @brief Symbol info for RuntimeDyld.
-class SymbolInfo {
+/// @brief Symbol info for RuntimeDyld.
+class SymbolTableEntry : public JITSymbolBase {
public:
- typedef enum { Hidden = 0, Default = 1 } Visibility;
-
- SymbolInfo() : Offset(0), SectionID(0), Vis(Hidden) {}
+ SymbolTableEntry()
+ : JITSymbolBase(JITSymbolFlags::None), Offset(0), SectionID(0) {}
- SymbolInfo(unsigned SectionID, uint64_t Offset, Visibility Vis)
- : Offset(Offset), SectionID(SectionID), Vis(Vis) {}
+ SymbolTableEntry(unsigned SectionID, uint64_t Offset, JITSymbolFlags Flags)
+ : JITSymbolBase(Flags), Offset(Offset), SectionID(SectionID) {}
unsigned getSectionID() const { return SectionID; }
uint64_t getOffset() const { return Offset; }
- Visibility getVisibility() const { return Vis; }
private:
uint64_t Offset;
- unsigned SectionID : 31;
- Visibility Vis : 1;
+ unsigned SectionID;
};
-typedef StringMap<SymbolInfo> RTDyldSymbolTable;
+typedef StringMap<SymbolTableEntry> RTDyldSymbolTable;
class RuntimeDyldImpl {
friend class RuntimeDyld::LoadedObjectInfo;
@@ -386,7 +391,7 @@ public:
virtual std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
loadObject(const object::ObjectFile &Obj) = 0;
- uint8_t* getSymbolAddress(StringRef Name) const {
+ uint8_t* getSymbolLocalAddress(StringRef Name) const {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name);
@@ -396,24 +401,16 @@ public:
return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
}
- uint64_t getSymbolLoadAddress(StringRef Name) const {
+ RuntimeDyld::SymbolInfo getSymbol(StringRef Name) const {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name);
if (pos == GlobalSymbolTable.end())
- return 0;
- const auto &SymInfo = pos->second;
- return getSectionLoadAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
- }
-
- uint64_t getExportedSymbolLoadAddress(StringRef Name) const {
- RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name);
- if (pos == GlobalSymbolTable.end())
- return 0;
- const auto &SymInfo = pos->second;
- if (SymInfo.getVisibility() == SymbolInfo::Hidden)
- return 0;
- return getSectionLoadAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
+ return nullptr;
+ const auto &SymEntry = pos->second;
+ uint64_t TargetAddr =
+ getSectionLoadAddress(SymEntry.getSectionID()) + SymEntry.getOffset();
+ return RuntimeDyld::SymbolInfo(TargetAddr, SymEntry.getFlags());
}
void resolveRelocations();
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
new file mode 100644
index 0000000..ce2f4a2
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -0,0 +1,214 @@
+//===-- RuntimeDyldCOFFX86_64.h --- COFF/X86_64 specific code ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF x86_x64 support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H
+
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/COFF.h"
+#include "../RuntimeDyldCOFF.h"
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldCOFFX86_64 : public RuntimeDyldCOFF {
+
+private:
+ // When a module is loaded we save the SectionID of the unwind
+ // sections in a table until we receive a request to register all
+ // unregisteredEH frame sections with the memory manager.
+ SmallVector<SID, 2> UnregisteredEHFrameSections;
+ SmallVector<SID, 2> RegisteredEHFrameSections;
+
+public:
+ RuntimeDyldCOFFX86_64(RTDyldMemoryManager *MM) : RuntimeDyldCOFF(MM) {}
+
+ unsigned getMaxStubSize() override {
+ return 6; // 2-byte jmp instruction + 32-bit relative address
+ }
+
+ // The target location for the relocation is described by RE.SectionID and
+ // RE.Offset. RE.SectionID can be used to find the SectionEntry. Each
+ // SectionEntry has three members describing its location.
+ // SectionEntry::Address is the address at which the section has been loaded
+ // into memory in the current (host) process. SectionEntry::LoadAddress is
+ // the address that the section will have in the target process.
+ // SectionEntry::ObjAddress is the address of the bits for this section in the
+ // original emitted object image (also in the current address space).
+ //
+ // Relocations will be applied as if the section were loaded at
+ // SectionEntry::LoadAddress, but they will be applied at an address based
+ // on SectionEntry::Address. SectionEntry::ObjAddress will be used to refer
+ // to Target memory contents if they are required for value calculations.
+ //
+ // The Value parameter here is the load address of the symbol for the
+ // relocation to be applied. For relocations which refer to symbols in the
+ // current object Value will be the LoadAddress of the section in which
+ // the symbol resides (RE.Addend provides additional information about the
+ // symbol location). For external symbols, Value will be the address of the
+ // symbol in the target address space.
+ void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t *Target = Section.Address + RE.Offset;
+
+ switch (RE.RelType) {
+
+ case COFF::IMAGE_REL_AMD64_REL32:
+ case COFF::IMAGE_REL_AMD64_REL32_1:
+ case COFF::IMAGE_REL_AMD64_REL32_2:
+ case COFF::IMAGE_REL_AMD64_REL32_3:
+ case COFF::IMAGE_REL_AMD64_REL32_4:
+ case COFF::IMAGE_REL_AMD64_REL32_5: {
+ uint32_t *TargetAddress = (uint32_t *)Target;
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ // Delta is the distance from the start of the reloc to the end of the
+ // instruction with the reloc.
+ uint64_t Delta = 4 + (RE.RelType - COFF::IMAGE_REL_AMD64_REL32);
+ Value -= FinalAddress + Delta;
+ uint64_t Result = Value + RE.Addend;
+ assert(((int64_t)Result <= INT32_MAX) && "Relocation overflow");
+ assert(((int64_t)Result >= INT32_MIN) && "Relocation underflow");
+ *TargetAddress = Result;
+ break;
+ }
+
+ case COFF::IMAGE_REL_AMD64_ADDR32NB: {
+ // Note ADDR32NB requires a well-established notion of
+ // image base. This address must be less than or equal
+ // to every section's load address, and all sections must be
+ // within a 32 bit offset from the base.
+ //
+ // For now we just set these to zero.
+ uint32_t *TargetAddress = (uint32_t *)Target;
+ *TargetAddress = 0;
+ break;
+ }
+
+ case COFF::IMAGE_REL_AMD64_ADDR64: {
+ uint64_t *TargetAddress = (uint64_t *)Target;
+ *TargetAddress = Value + RE.Addend;
+ break;
+ }
+
+ default:
+ llvm_unreachable("Relocation type not implemented yet!");
+ break;
+ }
+ }
+
+ relocation_iterator processRelocationRef(unsigned SectionID,
+ relocation_iterator RelI,
+ const ObjectFile &Obj,
+ ObjSectionToIDMap &ObjSectionToID,
+ StubMap &Stubs) override {
+ // Find the symbol referred to in the relocation, and
+ // get its section and offset.
+ //
+ // Insist for now that all symbols be resolvable within
+ // the scope of this object file.
+ symbol_iterator Symbol = RelI->getSymbol();
+ if (Symbol == Obj.symbol_end())
+ report_fatal_error("Unknown symbol in relocation");
+ unsigned TargetSectionID = 0;
+ uint64_t TargetOffset = UnknownAddressOrSize;
+ section_iterator SecI(Obj.section_end());
+ Symbol->getSection(SecI);
+ if (SecI == Obj.section_end())
+ report_fatal_error("Unknown section in relocation");
+ bool IsCode = SecI->isText();
+ TargetSectionID = findOrEmitSection(Obj, *SecI, IsCode, ObjSectionToID);
+ TargetOffset = getSymbolOffset(*Symbol);
+
+ // Determine the Addend used to adjust the relocation value.
+ uint64_t RelType;
+ Check(RelI->getType(RelType));
+ uint64_t Offset;
+ Check(RelI->getOffset(Offset));
+ uint64_t Addend = 0;
+ SectionEntry &Section = Sections[SectionID];
+ uintptr_t ObjTarget = Section.ObjAddress + Offset;
+
+ switch (RelType) {
+
+ case COFF::IMAGE_REL_AMD64_REL32:
+ case COFF::IMAGE_REL_AMD64_REL32_1:
+ case COFF::IMAGE_REL_AMD64_REL32_2:
+ case COFF::IMAGE_REL_AMD64_REL32_3:
+ case COFF::IMAGE_REL_AMD64_REL32_4:
+ case COFF::IMAGE_REL_AMD64_REL32_5:
+ case COFF::IMAGE_REL_AMD64_ADDR32NB: {
+ uint32_t *Displacement = (uint32_t *)ObjTarget;
+ Addend = *Displacement;
+ break;
+ }
+
+ case COFF::IMAGE_REL_AMD64_ADDR64: {
+ uint64_t *Displacement = (uint64_t *)ObjTarget;
+ Addend = *Displacement;
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ StringRef TargetName;
+ Symbol->getName(TargetName);
+ DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset
+ << " RelType: " << RelType << " TargetName: " << TargetName
+ << " Addend " << Addend << "\n");
+
+ RelocationEntry RE(SectionID, Offset, RelType, TargetOffset + Addend);
+ addRelocationForSection(RE, TargetSectionID);
+
+ return ++RelI;
+ }
+
+ unsigned getStubAlignment() override { return 1; }
+ void registerEHFrames() override {
+ if (!MemMgr)
+ return;
+ for (auto const &EHFrameSID : UnregisteredEHFrameSections) {
+ uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
+ uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
+ size_t EHFrameSize = Sections[EHFrameSID].Size;
+ MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
+ RegisteredEHFrameSections.push_back(EHFrameSID);
+ }
+ UnregisteredEHFrameSections.clear();
+ }
+ void deregisterEHFrames() override {
+ // Stub
+ }
+ void finalizeLoad(const ObjectFile &Obj,
+ ObjSectionToIDMap &SectionMap) override {
+ // Look for and record the EH frame section IDs.
+ for (const auto &SectionPair : SectionMap) {
+ const SectionRef &Section = SectionPair.first;
+ StringRef Name;
+ Check(Section.getName(Name));
+ // Note unwind info is split across .pdata and .xdata, so this
+ // may not be sufficiently general for all users.
+ if (Name == ".xdata") {
+ UnregisteredEHFrameSections.push_back(SectionPair.second);
+ }
+ }
+ }
+};
+
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 1746afd..9ccd744 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -158,6 +158,7 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
Options.DoCrossOver = Flags.cross_over;
Options.MutateDepth = Flags.mutate_depth;
Options.ExitOnFirst = Flags.exit_on_first;
+ Options.UseCounters = Flags.use_counters;
Options.UseFullCoverageSet = Flags.use_full_coverage_set;
Options.UseCoveragePairs = Flags.use_coverage_pairs;
Options.PreferSmallDuringInitialShuffle =
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 068f245..08176af 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -32,6 +32,7 @@ FUZZER_FLAG(int, help, 0, "Print help.")
FUZZER_FLAG(
int, save_minimized_corpus, 0,
"If 1, the minimized corpus is saved into the first input directory")
+FUZZER_FLAG(int, use_counters, 0, "Use coverage counters")
FUZZER_FLAG(int, use_full_coverage_set, 0,
"Experimental: Maximize the number of different full"
" coverage sets as opposed to maximizing the total coverage."
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index 980b00e..e4e5eb7 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -48,6 +48,7 @@ class Fuzzer {
bool DoCrossOver = true;
int MutateDepth = 5;
bool ExitOnFirst = false;
+ bool UseCounters = false;
bool UseFullCoverageSet = false;
bool UseCoveragePairs = false;
int PreferSmallDuringInitialShuffle = -1;
@@ -95,6 +96,15 @@ class Fuzzer {
std::vector<Unit> Corpus;
std::unordered_set<uintptr_t> FullCoverageSets;
std::unordered_set<uint64_t> CoveragePairs;
+
+ // For UseCounters
+ std::vector<uint8_t> CounterBitmap;
+ size_t TotalBits() { // Slow. Call it only for printing stats.
+ size_t Res = 0;
+ for (auto x : CounterBitmap) Res += __builtin_popcount(x);
+ return Res;
+ }
+
UserCallback Callback;
FuzzingOptions Options;
system_clock::time_point ProcessStartTime = system_clock::now();
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 70b63eb..563fbf4 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -138,17 +138,28 @@ size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) {
}
size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) {
+ size_t NumCounters = __sanitizer_get_number_of_counters();
+ if (Options.UseCounters) {
+ CounterBitmap.resize(NumCounters);
+ __sanitizer_update_counter_bitset_and_clear_counters(0);
+ }
size_t OldCoverage = __sanitizer_get_total_unique_coverage();
Callback(U.data(), U.size());
size_t NewCoverage = __sanitizer_get_total_unique_coverage();
+ size_t NumNewBits = 0;
+ if (Options.UseCounters)
+ NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters(
+ CounterBitmap.data());
+
if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity) {
size_t Seconds = secondsSinceProcessStartUp();
std::cerr
<< "#" << TotalNumberOfRuns
<< "\tcov: " << NewCoverage
+ << "\tbits: " << TotalBits()
<< "\texec/s: " << (Seconds ? TotalNumberOfRuns / Seconds : 0) << "\n";
}
- if (NewCoverage > OldCoverage)
+ if (NewCoverage > OldCoverage || NumNewBits)
return NewCoverage;
return 0;
}
@@ -189,6 +200,7 @@ size_t Fuzzer::MutateAndTestOne(Unit *U) {
if (Options.Verbosity) {
std::cerr << "#" << TotalNumberOfRuns
<< "\tNEW: " << NewCoverage
+ << " B: " << TotalBits()
<< " L: " << U->size()
<< " S: " << Corpus.size()
<< " I: " << i
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index bed9cd8..08130c6 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -5,6 +5,7 @@
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O0 -fsanitize-coverage=4")
set(Tests
+ CounterTest
FourIndependentBranchesTest
FullCoverageSetTest
InfiniteTest
diff --git a/lib/Fuzzer/test/CounterTest.cpp b/lib/Fuzzer/test/CounterTest.cpp
new file mode 100644
index 0000000..332ccfe
--- /dev/null
+++ b/lib/Fuzzer/test/CounterTest.cpp
@@ -0,0 +1,14 @@
+// Test for a fuzzer: must find the case where a particular basic block is
+// executed many times.
+#include <iostream>
+
+extern "C" void TestOneInput(const uint8_t *Data, size_t Size) {
+ int Num = 0;
+ for (size_t i = 0; i < Size; i++)
+ if (Data[i] == 'A' + i)
+ Num++;
+ if (Num >= 4) {
+ std::cerr << "BINGO!\n";
+ exit(1);
+ }
+}
diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test
index 1e42e72..45691f5 100644
--- a/lib/Fuzzer/test/fuzzer.test
+++ b/lib/Fuzzer/test/fuzzer.test
@@ -17,3 +17,6 @@ FullCoverageSetTest: BINGO
RUN: not ./LLVMFuzzer-FourIndependentBranchesTest -timeout=15 -seed=1 -use_coverage_pairs=1 2>&1 | FileCheck %s --check-prefix=FourIndependentBranchesTest
FourIndependentBranchesTest: BINGO
+
+RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=CounterTest
+CounterTest: BINGO
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index de0e614..ae0beba 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -14,9 +14,9 @@
//
//===----------------------------------------------------------------------===//
-#include "AsmWriter.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -32,12 +32,14 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/UseListOrder.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cctype>
using namespace llvm;
@@ -275,6 +277,15 @@ static const Module *getModuleFromVal(const Value *V) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return GV->getParent();
+
+ if (const auto *MAV = dyn_cast<MetadataAsValue>(V)) {
+ for (const User *U : MAV->users())
+ if (isa<Instruction>(U))
+ if (const Module *M = getModuleFromVal(U))
+ return M;
+ return nullptr;
+ }
+
return nullptr;
}
@@ -378,7 +389,29 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) {
}
-namespace llvm {
+namespace {
+class TypePrinting {
+ TypePrinting(const TypePrinting &) = delete;
+ void operator=(const TypePrinting&) = delete;
+public:
+
+ /// NamedTypes - The named types that are used by the current module.
+ TypeFinder NamedTypes;
+
+ /// NumberedTypes - The numbered types, along with their value.
+ DenseMap<StructType*, unsigned> NumberedTypes;
+
+
+ TypePrinting() {}
+ ~TypePrinting() {}
+
+ void incorporateTypes(const Module &M);
+
+ void print(Type *Ty, raw_ostream &OS);
+
+ void printStructBody(StructType *Ty, raw_ostream &OS);
+};
+} // namespace
void TypePrinting::incorporateTypes(const Module &M) {
NamedTypes.run(M, false);
@@ -508,6 +541,7 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
OS << '>';
}
+namespace {
//===----------------------------------------------------------------------===//
// SlotTracker Class: Enumerate slot numbers for unnamed values
//===----------------------------------------------------------------------===//
@@ -525,6 +559,7 @@ private:
/// TheFunction - The function for which we are holding slot numbers.
const Function* TheFunction;
bool FunctionProcessed;
+ bool ShouldInitializeAllMetadata;
/// mMap - The slot map for the module level data.
ValueMap mMap;
@@ -542,10 +577,20 @@ private:
DenseMap<AttributeSet, unsigned> asMap;
unsigned asNext;
public:
- /// Construct from a module
- explicit SlotTracker(const Module *M);
+ /// Construct from a module.
+ ///
+ /// If \c ShouldInitializeAllMetadata, initializes all metadata in all
+ /// functions, giving correct numbering for metadata referenced only from
+ /// within a function (even if no functions have been initialized).
+ explicit SlotTracker(const Module *M,
+ bool ShouldInitializeAllMetadata = false);
/// Construct from a function, starting out in incorp state.
- explicit SlotTracker(const Function *F);
+ ///
+ /// If \c ShouldInitializeAllMetadata, initializes all metadata in all
+ /// functions, giving correct numbering for metadata referenced only from
+ /// within a function (even if no functions have been initialized).
+ explicit SlotTracker(const Function *F,
+ bool ShouldInitializeAllMetadata = false);
/// Return the slot number of the specified value in it's type
/// plane. If something is not in the SlotTracker, return -1.
@@ -606,11 +651,18 @@ private:
/// Add all of the functions arguments, basic blocks, and instructions.
void processFunction();
+ /// Add all of the metadata from a function.
+ void processFunctionMetadata(const Function &F);
+
+ /// Add all of the metadata from an instruction.
+ void processInstructionMetadata(const Instruction &I);
+
SlotTracker(const SlotTracker &) = delete;
void operator=(const SlotTracker &) = delete;
};
+} // namespace
-SlotTracker *createSlotTracker(const Module *M) {
+static SlotTracker *createSlotTracker(const Module *M) {
return new SlotTracker(M);
}
@@ -645,15 +697,18 @@ static SlotTracker *createSlotTracker(const Value *V) {
// Module level constructor. Causes the contents of the Module (sans functions)
// to be added to the slot table.
-SlotTracker::SlotTracker(const Module *M)
- : TheModule(M), TheFunction(nullptr), FunctionProcessed(false), mNext(0),
+SlotTracker::SlotTracker(const Module *M, bool ShouldInitializeAllMetadata)
+ : TheModule(M), TheFunction(nullptr), FunctionProcessed(false),
+ ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0),
fNext(0), mdnNext(0), asNext(0) {}
// Function level constructor. Causes the contents of the Module and the one
// function provided to be added to the slot table.
-SlotTracker::SlotTracker(const Function *F)
+SlotTracker::SlotTracker(const Function *F, bool ShouldInitializeAllMetadata)
: TheModule(F ? F->getParent() : nullptr), TheFunction(F),
- FunctionProcessed(false), mNext(0), fNext(0), mdnNext(0), asNext(0) {}
+ FunctionProcessed(false),
+ ShouldInitializeAllMetadata(ShouldInitializeAllMetadata), mNext(0),
+ fNext(0), mdnNext(0), asNext(0) {}
inline void SlotTracker::initialize() {
if (TheModule) {
@@ -692,6 +747,9 @@ void SlotTracker::processModule() {
// Add all the unnamed functions to the table.
CreateModuleSlot(I);
+ if (ShouldInitializeAllMetadata)
+ processFunctionMetadata(*I);
+
// Add all the function attributes to the table.
// FIXME: Add attributes of other objects?
AttributeSet FnAttrs = I->getAttributes().getFnAttributes();
@@ -715,46 +773,30 @@ void SlotTracker::processFunction() {
ST_DEBUG("Inserting Instructions:\n");
- SmallVector<std::pair<unsigned, MDNode *>, 4> MDForInst;
-
// Add all of the basic blocks and instructions with no names.
- for (Function::const_iterator BB = TheFunction->begin(),
- E = TheFunction->end(); BB != E; ++BB) {
- if (!BB->hasName())
- CreateFunctionSlot(BB);
-
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
- ++I) {
- if (!I->getType()->isVoidTy() && !I->hasName())
- CreateFunctionSlot(I);
-
- // Intrinsics can directly use metadata. We allow direct calls to any
- // llvm.foo function here, because the target may not be linked into the
- // optimizer.
- if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (Function *F = CI->getCalledFunction())
- if (F->isIntrinsic())
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (auto *V = dyn_cast_or_null<MetadataAsValue>(I->getOperand(i)))
- if (MDNode *N = dyn_cast<MDNode>(V->getMetadata()))
- CreateMetadataSlot(N);
+ for (auto &BB : *TheFunction) {
+ if (!BB.hasName())
+ CreateFunctionSlot(&BB);
+
+ for (auto &I : BB) {
+ if (!I.getType()->isVoidTy() && !I.hasName())
+ CreateFunctionSlot(&I);
+
+ processInstructionMetadata(I);
+ // We allow direct calls to any llvm.foo function here, because the
+ // target may not be linked into the optimizer.
+ if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
// Add all the call attributes to the table.
AttributeSet Attrs = CI->getAttributes().getFnAttributes();
if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
CreateAttributeSetSlot(Attrs);
- } else if (const InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+ } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
// Add all the call attributes to the table.
AttributeSet Attrs = II->getAttributes().getFnAttributes();
if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
CreateAttributeSetSlot(Attrs);
}
-
- // Process metadata attached with this instruction.
- I->getAllMetadata(MDForInst);
- for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
- CreateMetadataSlot(MDForInst[i].second);
- MDForInst.clear();
}
}
@@ -763,6 +805,29 @@ void SlotTracker::processFunction() {
ST_DEBUG("end processFunction!\n");
}
+void SlotTracker::processFunctionMetadata(const Function &F) {
+ for (auto &BB : F)
+ for (auto &I : BB)
+ processInstructionMetadata(I);
+}
+
+void SlotTracker::processInstructionMetadata(const Instruction &I) {
+ // Process metadata used directly by intrinsics.
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (Function *F = CI->getCalledFunction())
+ if (F->isIntrinsic())
+ for (auto &Op : I.operands())
+ if (auto *V = dyn_cast_or_null<MetadataAsValue>(Op))
+ if (MDNode *N = dyn_cast<MDNode>(V->getMetadata()))
+ CreateMetadataSlot(N);
+
+ // Process metadata attached to this instruction.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I.getAllMetadata(MDs);
+ for (auto &MD : MDs)
+ CreateMetadataSlot(MD.second);
+}
+
/// Clean up after incorporating a function. This is the only way to get out of
/// the function incorporation state that affects get*Slot/Create*Slot. Function
/// incorporation state is indicated by TheFunction != 0.
@@ -1010,7 +1075,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
(StrVal[1] >= '0' && StrVal[1] <= '9'))) {
// Reparse stringized version!
if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) {
- Out << StrVal.str();
+ Out << StrVal;
return;
}
}
@@ -1223,6 +1288,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << ' ' << getPredicateText(CE->getPredicate());
Out << " (";
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
+ TypePrinter.print(
+ cast<PointerType>(GEP->getPointerOperandType()->getScalarType())
+ ->getElementType(),
+ Out);
+ Out << ", ";
+ }
+
for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
TypePrinter.print((*OI)->getType(), Out);
Out << ' ';
@@ -1285,8 +1358,52 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
}
return OS << FS.Sep;
}
+struct MDFieldPrinter {
+ raw_ostream &Out;
+ FieldSeparator FS;
+ TypePrinting *TypePrinter;
+ SlotTracker *Machine;
+ const Module *Context;
+
+ explicit MDFieldPrinter(raw_ostream &Out)
+ : Out(Out), TypePrinter(nullptr), Machine(nullptr), Context(nullptr) {}
+ MDFieldPrinter(raw_ostream &Out, TypePrinting *TypePrinter,
+ SlotTracker *Machine, const Module *Context)
+ : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) {
+ }
+ void printTag(const DebugNode *N);
+ void printString(StringRef Name, StringRef Value,
+ bool ShouldSkipEmpty = true);
+ void printMetadata(StringRef Name, const Metadata *MD,
+ bool ShouldSkipNull = true);
+ template <class IntTy>
+ void printInt(StringRef Name, IntTy Int, bool ShouldSkipZero = true);
+ void printBool(StringRef Name, bool Value);
+ void printDIFlags(StringRef Name, unsigned Flags);
+ template <class IntTy, class Stringifier>
+ void printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString,
+ bool ShouldSkipZero = true);
+};
} // end namespace
+void MDFieldPrinter::printTag(const DebugNode *N) {
+ Out << FS << "tag: ";
+ if (const char *Tag = dwarf::TagString(N->getTag()))
+ Out << Tag;
+ else
+ Out << N->getTag();
+}
+
+void MDFieldPrinter::printString(StringRef Name, StringRef Value,
+ bool ShouldSkipEmpty) {
+ if (ShouldSkipEmpty && Value.empty())
+ return;
+
+ Out << FS << Name << ": \"";
+ PrintEscapedString(Value, Out);
+ Out << "\"";
+}
+
static void writeMetadataAsOperand(raw_ostream &Out, const Metadata *MD,
TypePrinting *TypePrinter,
SlotTracker *Machine,
@@ -1298,27 +1415,68 @@ static void writeMetadataAsOperand(raw_ostream &Out, const Metadata *MD,
WriteAsOperandInternal(Out, MD, TypePrinter, Machine, Context);
}
-static void writeTag(raw_ostream &Out, FieldSeparator &FS, const DebugNode *N) {
- Out << FS << "tag: ";
- if (const char *Tag = dwarf::TagString(N->getTag()))
- Out << Tag;
+void MDFieldPrinter::printMetadata(StringRef Name, const Metadata *MD,
+ bool ShouldSkipNull) {
+ if (ShouldSkipNull && !MD)
+ return;
+
+ Out << FS << Name << ": ";
+ writeMetadataAsOperand(Out, MD, TypePrinter, Machine, Context);
+}
+
+template <class IntTy>
+void MDFieldPrinter::printInt(StringRef Name, IntTy Int, bool ShouldSkipZero) {
+ if (ShouldSkipZero && !Int)
+ return;
+
+ Out << FS << Name << ": " << Int;
+}
+
+void MDFieldPrinter::printBool(StringRef Name, bool Value) {
+ Out << FS << Name << ": " << (Value ? "true" : "false");
+}
+
+void MDFieldPrinter::printDIFlags(StringRef Name, unsigned Flags) {
+ if (!Flags)
+ return;
+
+ Out << FS << Name << ": ";
+
+ SmallVector<unsigned, 8> SplitFlags;
+ unsigned Extra = DIDescriptor::splitFlags(Flags, SplitFlags);
+
+ FieldSeparator FlagsFS(" | ");
+ for (unsigned F : SplitFlags) {
+ const char *StringF = DIDescriptor::getFlagString(F);
+ assert(StringF && "Expected valid flag");
+ Out << FlagsFS << StringF;
+ }
+ if (Extra || SplitFlags.empty())
+ Out << FlagsFS << Extra;
+}
+
+template <class IntTy, class Stringifier>
+void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value,
+ Stringifier toString, bool ShouldSkipZero) {
+ if (!Value)
+ return;
+
+ Out << FS << Name << ": ";
+ if (const char *S = toString(Value))
+ Out << S;
else
- Out << N->getTag();
+ Out << Value;
}
static void writeGenericDebugNode(raw_ostream &Out, const GenericDebugNode *N,
TypePrinting *TypePrinter,
SlotTracker *Machine, const Module *Context) {
Out << "!GenericDebugNode(";
- FieldSeparator FS;
- writeTag(Out, FS, N);
- if (!N->getHeader().empty()) {
- Out << FS << "header: \"";
- PrintEscapedString(N->getHeader(), Out);
- Out << "\"";
- }
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printTag(N);
+ Printer.printString("header", N->getHeader());
if (N->getNumDwarfOperands()) {
- Out << FS << "operands: {";
+ Out << Printer.FS << "operands: {";
FieldSeparator IFS;
for (auto &I : N->dwarf_operands()) {
Out << IFS;
@@ -1333,111 +1491,64 @@ static void writeMDLocation(raw_ostream &Out, const MDLocation *DL,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
Out << "!MDLocation(";
- FieldSeparator FS;
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
// Always output the line, since 0 is a relevant and important value for it.
- Out << FS << "line: " << DL->getLine();
- if (DL->getColumn())
- Out << FS << "column: " << DL->getColumn();
- Out << FS << "scope: ";
- WriteAsOperandInternal(Out, DL->getScope(), TypePrinter, Machine, Context);
- if (DL->getInlinedAt()) {
- Out << FS << "inlinedAt: ";
- WriteAsOperandInternal(Out, DL->getInlinedAt(), TypePrinter, Machine,
- Context);
- }
+ Printer.printInt("line", DL->getLine(), /* ShouldSkipZero */ false);
+ Printer.printInt("column", DL->getColumn());
+ Printer.printMetadata("scope", DL->getRawScope(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("inlinedAt", DL->getRawInlinedAt());
Out << ")";
}
static void writeMDSubrange(raw_ostream &Out, const MDSubrange *N,
TypePrinting *, SlotTracker *, const Module *) {
Out << "!MDSubrange(";
- FieldSeparator FS;
- Out << FS << "count: " << N->getCount();
- if (N->getLo())
- Out << FS << "lowerBound: " << N->getLo();
+ MDFieldPrinter Printer(Out);
+ Printer.printInt("count", N->getCount(), /* ShouldSkipZero */ false);
+ Printer.printInt("lowerBound", N->getLo());
Out << ")";
}
static void writeMDEnumerator(raw_ostream &Out, const MDEnumerator *N,
TypePrinting *, SlotTracker *, const Module *) {
Out << "!MDEnumerator(";
- FieldSeparator FS;
- Out << FS << "name: \"" << N->getName() << "\"";
- Out << FS << "value: " << N->getValue();
+ MDFieldPrinter Printer(Out);
+ Printer.printString("name", N->getName(), /* ShouldSkipEmpty */ false);
+ Printer.printInt("value", N->getValue(), /* ShouldSkipZero */ false);
Out << ")";
}
static void writeMDBasicType(raw_ostream &Out, const MDBasicType *N,
TypePrinting *, SlotTracker *, const Module *) {
Out << "!MDBasicType(";
- FieldSeparator FS;
- writeTag(Out, FS, N);
- if (!N->getName().empty())
- Out << FS << "name: \"" << N->getName() << "\"";
- if (N->getSizeInBits())
- Out << FS << "size: " << N->getSizeInBits();
- if (N->getAlignInBits())
- Out << FS << "align: " << N->getAlignInBits();
- if (unsigned Encoding = N->getEncoding()) {
- Out << FS << "encoding: ";
- if (const char *S = dwarf::AttributeEncodingString(Encoding))
- Out << S;
- else
- Out << Encoding;
- }
+ MDFieldPrinter Printer(Out);
+ if (N->getTag() != dwarf::DW_TAG_base_type)
+ Printer.printTag(N);
+ Printer.printString("name", N->getName());
+ Printer.printInt("size", N->getSizeInBits());
+ Printer.printInt("align", N->getAlignInBits());
+ Printer.printDwarfEnum("encoding", N->getEncoding(),
+ dwarf::AttributeEncodingString);
Out << ")";
}
-static void writeDIFlags(raw_ostream &Out, unsigned Flags) {
- SmallVector<unsigned, 8> SplitFlags;
- unsigned Extra = DIDescriptor::splitFlags(Flags, SplitFlags);
-
- FieldSeparator FS(" | ");
- for (unsigned F : SplitFlags) {
- const char *StringF = DIDescriptor::getFlagString(F);
- assert(StringF && "Expected valid flag");
- Out << FS << StringF;
- }
- if (Extra || SplitFlags.empty())
- Out << FS << Extra;
-}
-
static void writeMDDerivedType(raw_ostream &Out, const MDDerivedType *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
Out << "!MDDerivedType(";
- FieldSeparator FS;
- writeTag(Out, FS, N);
- if (!N->getName().empty())
- Out << FS << "name: \"" << N->getName() << "\"";
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine,
- Context);
- }
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
- if (N->getScope()) {
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- }
- Out << FS << "baseType: ";
- writeMetadataAsOperand(Out, N->getBaseType(), TypePrinter, Machine, Context);
- if (N->getSizeInBits())
- Out << FS << "size: " << N->getSizeInBits();
- if (N->getAlignInBits())
- Out << FS << "align: " << N->getAlignInBits();
- if (N->getOffsetInBits())
- Out << FS << "offset: " << N->getOffsetInBits();
- if (auto Flags = N->getFlags()) {
- Out << FS << "flags: ";
- writeDIFlags(Out, Flags);
- }
- if (N->getExtraData()) {
- Out << FS << "extraData: ";
- writeMetadataAsOperand(Out, N->getExtraData(), TypePrinter, Machine,
- Context);
- }
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printTag(N);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("scope", N->getScope());
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("line", N->getLine());
+ Printer.printMetadata("baseType", N->getBaseType(),
+ /* ShouldSkipNull */ false);
+ Printer.printInt("size", N->getSizeInBits());
+ Printer.printInt("align", N->getAlignInBits());
+ Printer.printInt("offset", N->getOffsetInBits());
+ Printer.printDIFlags("flags", N->getFlags());
+ Printer.printMetadata("extraData", N->getExtraData());
Out << ")";
}
@@ -1445,61 +1556,23 @@ static void writeMDCompositeType(raw_ostream &Out, const MDCompositeType *N,
TypePrinting *TypePrinter,
SlotTracker *Machine, const Module *Context) {
Out << "!MDCompositeType(";
- FieldSeparator FS;
- writeTag(Out, FS, N);
- if (!N->getName().empty())
- Out << FS << "name: \"" << N->getName() << "\"";
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine,
- Context);
- }
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
- if (N->getScope()) {
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- }
- if (N->getBaseType()) {
- Out << FS << "baseType: ";
- writeMetadataAsOperand(Out, N->getBaseType(), TypePrinter, Machine,
- Context);
- }
- if (N->getSizeInBits())
- Out << FS << "size: " << N->getSizeInBits();
- if (N->getAlignInBits())
- Out << FS << "align: " << N->getAlignInBits();
- if (N->getOffsetInBits())
- Out << FS << "offset: " << N->getOffsetInBits();
- if (auto Flags = N->getFlags()) {
- Out << FS << "flags: ";
- writeDIFlags(Out, Flags);
- }
- if (N->getElements()) {
- Out << FS << "elements: ";
- writeMetadataAsOperand(Out, N->getElements(), TypePrinter, Machine,
- Context);
- }
- if (unsigned Lang = N->getRuntimeLang()) {
- Out << FS << "runtimeLang: ";
- if (const char *S = dwarf::LanguageString(Lang))
- Out << S;
- else
- Out << Lang;
- }
-
- if (N->getVTableHolder()) {
- Out << FS << "vtableHolder: ";
- writeMetadataAsOperand(Out, N->getVTableHolder(), TypePrinter, Machine,
- Context);
- }
- if (N->getTemplateParams()) {
- Out << FS << "templateParams: ";
- writeMetadataAsOperand(Out, N->getTemplateParams(), TypePrinter, Machine,
- Context);
- }
- if (!N->getIdentifier().empty())
- Out << FS << "identifier: \"" << N->getIdentifier() << "\"";
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printTag(N);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("scope", N->getScope());
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("line", N->getLine());
+ Printer.printMetadata("baseType", N->getBaseType());
+ Printer.printInt("size", N->getSizeInBits());
+ Printer.printInt("align", N->getAlignInBits());
+ Printer.printInt("offset", N->getOffsetInBits());
+ Printer.printDIFlags("flags", N->getFlags());
+ Printer.printMetadata("elements", N->getElements());
+ Printer.printDwarfEnum("runtimeLang", N->getRuntimeLang(),
+ dwarf::LanguageString);
+ Printer.printMetadata("vtableHolder", N->getVTableHolder());
+ Printer.printMetadata("templateParams", N->getTemplateParams());
+ Printer.printString("identifier", N->getIdentifier());
Out << ")";
}
@@ -1507,22 +1580,20 @@ static void writeMDSubroutineType(raw_ostream &Out, const MDSubroutineType *N,
TypePrinting *TypePrinter,
SlotTracker *Machine, const Module *Context) {
Out << "!MDSubroutineType(";
- FieldSeparator FS;
- if (auto Flags = N->getFlags()) {
- Out << FS << "flags: ";
- writeDIFlags(Out, Flags);
- }
- Out << FS << "types: ";
- writeMetadataAsOperand(Out, N->getTypeArray(), TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printDIFlags("flags", N->getFlags());
+ Printer.printMetadata("types", N->getTypeArray(), /* ShouldSkipNull */ false);
Out << ")";
}
static void writeMDFile(raw_ostream &Out, const MDFile *N, TypePrinting *,
SlotTracker *, const Module *) {
Out << "!MDFile(";
- FieldSeparator FS;
- Out << FS << "filename: \"" << N->getFilename() << "\"";
- Out << FS << "directory: \"" << N->getDirectory() << "\"";
+ MDFieldPrinter Printer(Out);
+ Printer.printString("filename", N->getFilename(),
+ /* ShouldSkipEmpty */ false);
+ Printer.printString("directory", N->getDirectory(),
+ /* ShouldSkipEmpty */ false);
Out << ")";
}
@@ -1530,48 +1601,23 @@ static void writeMDCompileUnit(raw_ostream &Out, const MDCompileUnit *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
Out << "!MDCompileUnit(";
- FieldSeparator FS;
- Out << FS << "language: ";
- if (const char *Lang = dwarf::LanguageString(N->getSourceLanguage()))
- Out << Lang;
- else
- Out << N->getSourceLanguage();
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, Context);
- if (!N->getProducer().empty())
- Out << FS << "producer: \"" << N->getProducer() << "\"";
- Out << FS << "isOptimized: " << (N->isOptimized() ? "true" : "false");
- if (!N->getFlags().empty())
- Out << FS << "flags: \"" << N->getFlags() << "\"";
- Out << FS << "runtimeVersion: " << N->getRuntimeVersion();
- if (!N->getSplitDebugFilename().empty())
- Out << FS << "splitDebugFilename: \"" << N->getSplitDebugFilename() << "\"";
- Out << FS << "emissionKind: " << N->getEmissionKind();
- if (N->getEnumTypes()) {
- Out << FS << "enums: ";
- writeMetadataAsOperand(Out, N->getEnumTypes(), TypePrinter, Machine,
- Context);
- }
- if (N->getRetainedTypes()) {
- Out << FS << "retainedTypes: ";
- writeMetadataAsOperand(Out, N->getRetainedTypes(), TypePrinter, Machine,
- Context);
- }
- if (N->getSubprograms()) {
- Out << FS << "subprograms: ";
- writeMetadataAsOperand(Out, N->getSubprograms(), TypePrinter, Machine,
- Context);
- }
- if (N->getGlobalVariables()) {
- Out << FS << "globals: ";
- writeMetadataAsOperand(Out, N->getGlobalVariables(), TypePrinter, Machine,
- Context);
- }
- if (N->getImportedEntities()) {
- Out << FS << "imports: ";
- writeMetadataAsOperand(Out, N->getImportedEntities(), TypePrinter, Machine,
- Context);
- }
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printDwarfEnum("language", N->getSourceLanguage(),
+ dwarf::LanguageString, /* ShouldSkipZero */ false);
+ Printer.printMetadata("file", N->getFile(), /* ShouldSkipNull */ false);
+ Printer.printString("producer", N->getProducer());
+ Printer.printBool("isOptimized", N->isOptimized());
+ Printer.printString("flags", N->getFlags());
+ Printer.printInt("runtimeVersion", N->getRuntimeVersion(),
+ /* ShouldSkipZero */ false);
+ Printer.printString("splitDebugFilename", N->getSplitDebugFilename());
+ Printer.printInt("emissionKind", N->getEmissionKind(),
+ /* ShouldSkipZero */ false);
+ Printer.printMetadata("enums", N->getEnumTypes());
+ Printer.printMetadata("retainedTypes", N->getRetainedTypes());
+ Printer.printMetadata("subprograms", N->getSubprograms());
+ Printer.printMetadata("globals", N->getGlobalVariables());
+ Printer.printMetadata("imports", N->getImportedEntities());
Out << ")";
}
@@ -1579,67 +1625,26 @@ static void writeMDSubprogram(raw_ostream &Out, const MDSubprogram *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
Out << "!MDSubprogram(";
- FieldSeparator FS;
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- Out << FS << "name: \"" << N->getName() << "\"";
- if (!N->getLinkageName().empty())
- Out << FS << "linkageName: \"" << N->getLinkageName() << "\"";
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine,
- Context);
- }
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
- if (N->getType()) {
- Out << FS << "type: ";
- writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine,
- Context);
- }
- Out << FS << "isLocal: " << (N->isLocalToUnit() ? "true" : "false");
- Out << FS << "isDefinition: " << (N->isDefinition() ? "true" : "false");
- if (N->getScopeLine())
- Out << FS << "scopeLine: " << N->getScopeLine();
- if (N->getContainingType()) {
- Out << FS << "containingType: ";
- writeMetadataAsOperand(Out, N->getContainingType(), TypePrinter, Machine,
- Context);
- }
- if (unsigned V = N->getVirtuality()) {
- Out << FS << "virtuality: ";
- if (const char *S = dwarf::VirtualityString(V))
- Out << S;
- else
- Out << V;
- }
- if (N->getVirtualIndex())
- Out << FS << "virtualIndex: " << N->getVirtualIndex();
- if (auto Flags = N->getFlags()) {
- Out << FS << "flags: ";
- writeDIFlags(Out, Flags);
- }
- Out << FS << "isOptimized: " << (N->isOptimized() ? "true" : "false");
- if (N->getFunction()) {
- Out << FS << "function: ";
- writeMetadataAsOperand(Out, N->getFunction(), TypePrinter, Machine,
- Context);
- }
- if (N->getTemplateParams()) {
- Out << FS << "templateParams: ";
- writeMetadataAsOperand(Out, N->getTemplateParams(), TypePrinter, Machine,
- Context);
- }
- if (N->getDeclaration()) {
- Out << FS << "declaration: ";
- writeMetadataAsOperand(Out, N->getDeclaration(), TypePrinter, Machine,
- Context);
- }
- if (N->getVariables()) {
- Out << FS << "variables: ";
- writeMetadataAsOperand(Out, N->getVariables(), TypePrinter, Machine,
- Context);
- }
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printString("name", N->getName());
+ Printer.printString("linkageName", N->getLinkageName());
+ Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("line", N->getLine());
+ Printer.printMetadata("type", N->getType());
+ Printer.printBool("isLocal", N->isLocalToUnit());
+ Printer.printBool("isDefinition", N->isDefinition());
+ Printer.printInt("scopeLine", N->getScopeLine());
+ Printer.printMetadata("containingType", N->getContainingType());
+ Printer.printDwarfEnum("virtuality", N->getVirtuality(),
+ dwarf::VirtualityString);
+ Printer.printInt("virtualIndex", N->getVirtualIndex());
+ Printer.printDIFlags("flags", N->getFlags());
+ Printer.printBool("isOptimized", N->isOptimized());
+ Printer.printMetadata("function", N->getFunction());
+ Printer.printMetadata("templateParams", N->getTemplateParams());
+ Printer.printMetadata("declaration", N->getDeclaration());
+ Printer.printMetadata("variables", N->getVariables());
Out << ")";
}
@@ -1647,18 +1652,11 @@ static void writeMDLexicalBlock(raw_ostream &Out, const MDLexicalBlock *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
Out << "!MDLexicalBlock(";
- FieldSeparator FS;
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine,
- Context);
- }
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
- if (N->getColumn())
- Out << FS << "column: " << N->getColumn();
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("line", N->getLine());
+ Printer.printInt("column", N->getColumn());
Out << ")";
}
@@ -1668,15 +1666,11 @@ static void writeMDLexicalBlockFile(raw_ostream &Out,
SlotTracker *Machine,
const Module *Context) {
Out << "!MDLexicalBlockFile(";
- FieldSeparator FS;
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine,
- Context);
- }
- Out << FS << "discriminator: " << N->getDiscriminator();
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("discriminator", N->getDiscriminator(),
+ /* ShouldSkipZero */ false);
Out << ")";
}
@@ -1684,17 +1678,11 @@ static void writeMDNamespace(raw_ostream &Out, const MDNamespace *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
Out << "!MDNamespace(";
- FieldSeparator FS;
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, Context);
- }
- if (!N->getName().empty())
- Out << FS << "name: \"" << N->getName() << "\"";
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("line", N->getLine());
Out << ")";
}
@@ -1704,10 +1692,9 @@ static void writeMDTemplateTypeParameter(raw_ostream &Out,
SlotTracker *Machine,
const Module *Context) {
Out << "!MDTemplateTypeParameter(";
- FieldSeparator FS;
- Out << FS << "name: \"" << N->getName() << "\"";
- Out << FS << "type: ";
- writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("type", N->getType(), /* ShouldSkipNull */ false);
Out << ")";
}
@@ -1717,13 +1704,12 @@ static void writeMDTemplateValueParameter(raw_ostream &Out,
SlotTracker *Machine,
const Module *Context) {
Out << "!MDTemplateValueParameter(";
- FieldSeparator FS;
- writeTag(Out, FS, N);
- Out << FS << "name: \"" << N->getName() << "\"";
- Out << FS << "type: ";
- writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, Context);
- Out << FS << "value: ";
- writeMetadataAsOperand(Out, N->getValue(), TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ if (N->getTag() != dwarf::DW_TAG_template_value_parameter)
+ Printer.printTag(N);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("type", N->getType());
+ Printer.printMetadata("value", N->getValue(), /* ShouldSkipNull */ false);
Out << ")";
}
@@ -1731,36 +1717,17 @@ static void writeMDGlobalVariable(raw_ostream &Out, const MDGlobalVariable *N,
TypePrinting *TypePrinter,
SlotTracker *Machine, const Module *Context) {
Out << "!MDGlobalVariable(";
- FieldSeparator FS;
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- Out << FS << "name: \"" << N->getName() << "\"";
- if (!N->getLinkageName().empty())
- Out << FS << "linkageName: \"" << N->getLinkageName() << "\"";
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine,
- Context);
- }
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
- if (N->getType()) {
- Out << FS << "type: ";
- writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine,
- Context);
- }
- Out << FS << "isLocal: " << (N->isLocalToUnit() ? "true" : "false");
- Out << FS << "isDefinition: " << (N->isDefinition() ? "true" : "false");
- if (N->getVariable()) {
- Out << FS << "variable: ";
- writeMetadataAsOperand(Out, N->getVariable(), TypePrinter, Machine,
- Context);
- }
- if (N->getStaticDataMemberDeclaration()) {
- Out << FS << "declaration: ";
- writeMetadataAsOperand(Out, N->getStaticDataMemberDeclaration(),
- TypePrinter, Machine, Context);
- }
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printString("name", N->getName());
+ Printer.printString("linkageName", N->getLinkageName());
+ Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("line", N->getLine());
+ Printer.printMetadata("type", N->getType());
+ Printer.printBool("isLocal", N->isLocalToUnit());
+ Printer.printBool("isDefinition", N->isDefinition());
+ Printer.printMetadata("variable", N->getVariable());
+ Printer.printMetadata("declaration", N->getStaticDataMemberDeclaration());
Out << ")";
}
@@ -1768,34 +1735,18 @@ static void writeMDLocalVariable(raw_ostream &Out, const MDLocalVariable *N,
TypePrinting *TypePrinter,
SlotTracker *Machine, const Module *Context) {
Out << "!MDLocalVariable(";
- FieldSeparator FS;
- writeTag(Out, FS, N);
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- Out << FS << "name: \"" << N->getName() << "\"";
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine,
- Context);
- }
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
- if (N->getType()) {
- Out << FS << "type: ";
- writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine,
- Context);
- }
- if (N->getTag() == dwarf::DW_TAG_arg_variable || N->getArg())
- Out << FS << "arg: " << N->getArg();
- if (auto Flags = N->getFlags()) {
- Out << FS << "flags: ";
- writeDIFlags(Out, Flags);
- }
- if (N->getInlinedAt()) {
- Out << FS << "inlinedAt: ";
- writeMetadataAsOperand(Out, N->getInlinedAt(), TypePrinter, Machine,
- Context);
- }
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printTag(N);
+ Printer.printString("name", N->getName());
+ Printer.printInt("arg", N->getArg(),
+ /* ShouldSkipZero */
+ N->getTag() == dwarf::DW_TAG_auto_variable);
+ Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("line", N->getLine());
+ Printer.printMetadata("type", N->getType());
+ Printer.printDIFlags("flags", N->getFlags());
+ Printer.printMetadata("inlinedAt", N->getInlinedAt());
Out << ")";
}
@@ -1824,24 +1775,14 @@ static void writeMDObjCProperty(raw_ostream &Out, const MDObjCProperty *N,
TypePrinting *TypePrinter, SlotTracker *Machine,
const Module *Context) {
Out << "!MDObjCProperty(";
- FieldSeparator FS;
- Out << FS << "name: \"" << N->getName() << "\"";
- if (N->getFile()) {
- Out << FS << "file: ";
- writeMetadataAsOperand(Out, N->getFile(), TypePrinter, Machine, Context);
- }
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
- if (!N->getSetterName().empty())
- Out << FS << "setter: \"" << N->getSetterName() << "\"";
- if (!N->getGetterName().empty())
- Out << FS << "getter: \"" << N->getGetterName() << "\"";
- if (N->getAttributes())
- Out << FS << "attributes: " << N->getAttributes();
- if (N->getType()) {
- Out << FS << "type: ";
- writeMetadataAsOperand(Out, N->getType(), TypePrinter, Machine, Context);
- }
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("file", N->getFile());
+ Printer.printInt("line", N->getLine());
+ Printer.printString("setter", N->getSetterName());
+ Printer.printString("getter", N->getGetterName());
+ Printer.printInt("attributes", N->getAttributes());
+ Printer.printMetadata("type", N->getType());
Out << ")";
}
@@ -1849,17 +1790,12 @@ static void writeMDImportedEntity(raw_ostream &Out, const MDImportedEntity *N,
TypePrinting *TypePrinter,
SlotTracker *Machine, const Module *Context) {
Out << "!MDImportedEntity(";
- FieldSeparator FS;
- writeTag(Out, FS, N);
- Out << FS << "scope: ";
- writeMetadataAsOperand(Out, N->getScope(), TypePrinter, Machine, Context);
- if (N->getEntity()) {
- Out << FS << "entity: ";
- writeMetadataAsOperand(Out, N->getEntity(), TypePrinter, Machine, Context);
- }
- if (N->getLine())
- Out << FS << "line: " << N->getLine();
- Out << FS << "name: \"" << N->getName() << "\"";
+ MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ Printer.printTag(N);
+ Printer.printString("name", N->getName());
+ Printer.printMetadata("scope", N->getScope(), /* ShouldSkipNull */ false);
+ Printer.printMetadata("entity", N->getEntity());
+ Printer.printInt("line", N->getLine());
Out << ")";
}
@@ -1868,10 +1804,10 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
TypePrinting *TypePrinter,
SlotTracker *Machine,
const Module *Context) {
- assert(!Node->isTemporary() && "Unexpected forward declaration");
-
if (Node->isDistinct())
Out << "distinct ";
+ else if (Node->isTemporary())
+ Out << "<temporary!> "; // Handle broken code.
switch (Node->getMetadataID()) {
default:
@@ -1998,6 +1934,64 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
WriteAsOperandInternal(Out, V->getValue(), TypePrinter, Machine, Context);
}
+namespace {
+class AssemblyWriter {
+ formatted_raw_ostream &Out;
+ const Module *TheModule;
+ std::unique_ptr<SlotTracker> ModuleSlotTracker;
+ SlotTracker &Machine;
+ TypePrinting TypePrinter;
+ AssemblyAnnotationWriter *AnnotationWriter;
+ SetVector<const Comdat *> Comdats;
+ UseListOrderStack UseListOrders;
+
+public:
+ /// Construct an AssemblyWriter with an external SlotTracker
+ AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+ const Module *M, AssemblyAnnotationWriter *AAW);
+
+ /// Construct an AssemblyWriter with an internally allocated SlotTracker
+ AssemblyWriter(formatted_raw_ostream &o, const Module *M,
+ AssemblyAnnotationWriter *AAW);
+
+ void printMDNodeBody(const MDNode *MD);
+ void printNamedMDNode(const NamedMDNode *NMD);
+
+ void printModule(const Module *M);
+
+ void writeOperand(const Value *Op, bool PrintType);
+ void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
+ void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
+ void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering,
+ SynchronizationScope SynchScope);
+
+ void writeAllMDNodes();
+ void writeMDNode(unsigned Slot, const MDNode *Node);
+ void writeAllAttributeGroups();
+
+ void printTypeIdentities();
+ void printGlobal(const GlobalVariable *GV);
+ void printAlias(const GlobalAlias *GV);
+ void printComdat(const Comdat *C);
+ void printFunction(const Function *F);
+ void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx);
+ void printBasicBlock(const BasicBlock *BB);
+ void printInstructionLine(const Instruction &I);
+ void printInstruction(const Instruction &I);
+
+ void printUseListOrder(const UseListOrder &Order);
+ void printUseLists(const Function *F);
+
+private:
+ void init();
+
+ // printInfoComment - Print a little comment after the instruction indicating
+ // which slot it occupies.
+ void printInfoComment(const Value &V);
+};
+} // namespace
+
void AssemblyWriter::init() {
if (!TheModule)
return;
@@ -2025,8 +2019,6 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M,
init();
}
-AssemblyWriter::~AssemblyWriter() { }
-
void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
if (!Operand) {
Out << "<null operand!>";
@@ -2876,7 +2868,13 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (AI->isUsedWithInAlloca())
Out << "inalloca ";
TypePrinter.print(AI->getAllocatedType(), Out);
- if (!AI->getArraySize() || AI->isArrayAllocation()) {
+
+ // Explicitly write the array size if the code is broken, if it's an array
+ // allocation, or if the type is not canonical for scalar allocations. The
+ // latter case prevents the type from mutating when round-tripping through
+ // assembly.
+ if (!AI->getArraySize() || AI->isArrayAllocation() ||
+ !AI->getArraySize()->getType()->isIntegerTy(32)) {
Out << ", ";
writeOperand(AI->getArraySize(), true);
}
@@ -2898,6 +2896,15 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ";
TypePrinter.print(I.getType(), Out);
} else if (Operand) { // Print the normal way.
+ if (const auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ Out << ' ';
+ TypePrinter.print(GEP->getSourceElementType(), Out);
+ Out << ',';
+ } else if (const auto *LI = dyn_cast<LoadInst>(&I)) {
+ Out << ' ';
+ TypePrinter.print(LI->getType(), Out);
+ Out << ',';
+ }
// PrintAllTypes - Instructions who have operands of all the same type
// omit the type from all but the first operand. If the instruction has
@@ -2974,29 +2981,6 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
printInfoComment(I);
}
-static void WriteMDNodeComment(const MDNode *Node,
- formatted_raw_ostream &Out) {
- if (Node->getNumOperands() < 1)
- return;
-
- Metadata *Op = Node->getOperand(0);
- if (!Op || !isa<MDString>(Op))
- return;
-
- DIDescriptor Desc(Node);
- if (!Desc.Verify())
- return;
-
- unsigned Tag = Desc.getTag();
- Out.PadToColumn(50);
- if (dwarf::TagString(Tag)) {
- Out << "; ";
- Desc.print(Out);
- } else if (Tag == dwarf::DW_TAG_user_base) {
- Out << "; [ DW_TAG_user_base ]";
- }
-}
-
void AssemblyWriter::writeMDNode(unsigned Slot, const MDNode *Node) {
Out << '!' << Slot << " = ";
printMDNodeBody(Node);
@@ -3017,7 +3001,6 @@ void AssemblyWriter::writeAllMDNodes() {
void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
- WriteMDNodeComment(Node, Out);
}
void AssemblyWriter::writeAllAttributeGroups() {
@@ -3034,8 +3017,6 @@ void AssemblyWriter::writeAllAttributeGroups() {
<< I->first.getAsString(AttributeSet::FunctionIndex, true) << " }\n";
}
-} // namespace llvm
-
void AssemblyWriter::printUseListOrder(const UseListOrder &Order) {
bool IsInFunction = Machine.getFunction();
if (IsInFunction)
@@ -3130,11 +3111,24 @@ void Type::print(raw_ostream &OS) const {
}
}
+static bool isReferencingMDNode(const Instruction &I) {
+ if (const auto *CI = dyn_cast<CallInst>(&I))
+ if (Function *F = CI->getCalledFunction())
+ if (F->isIntrinsic())
+ for (auto &Op : I.operands())
+ if (auto *V = dyn_cast_or_null<MetadataAsValue>(Op))
+ if (isa<MDNode>(V->getMetadata()))
+ return true;
+ return false;
+}
+
void Value::print(raw_ostream &ROS) const {
formatted_raw_ostream OS(ROS);
if (const Instruction *I = dyn_cast<Instruction>(this)) {
const Function *F = I->getParent() ? I->getParent()->getParent() : nullptr;
- SlotTracker SlotTable(F);
+ SlotTracker SlotTable(
+ F,
+ /* ShouldInitializeAllMetadata */ isReferencingMDNode(*I));
AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr);
W.printInstruction(*I);
} else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
@@ -3142,7 +3136,8 @@ void Value::print(raw_ostream &ROS) const {
AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr);
W.printBasicBlock(BB);
} else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
- SlotTracker SlotTable(GV->getParent());
+ SlotTracker SlotTable(GV->getParent(),
+ /* ShouldInitializeAllMetadata */ isa<Function>(GV));
AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr);
if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
W.printGlobal(V);
@@ -3151,7 +3146,7 @@ void Value::print(raw_ostream &ROS) const {
else
W.printAlias(cast<GlobalAlias>(GV));
} else if (const MetadataAsValue *V = dyn_cast<MetadataAsValue>(this)) {
- V->getMetadata()->print(ROS);
+ V->getMetadata()->print(ROS, getModuleFromVal(V));
} else if (const Constant *C = dyn_cast<Constant>(this)) {
TypePrinting TypePrinter;
TypePrinter.print(C->getType(), OS);
@@ -3167,8 +3162,9 @@ void Value::print(raw_ostream &ROS) const {
void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) const {
// Fast path: Don't construct and populate a TypePrinting object if we
// won't be needing any types printed.
- if (!PrintType && ((!isa<Constant>(this) && !isa<MetadataAsValue>(this)) ||
- hasName() || isa<GlobalValue>(this))) {
+ bool IsMetadata = isa<MetadataAsValue>(this);
+ if (!PrintType && ((!isa<Constant>(this) && !IsMetadata) || hasName() ||
+ isa<GlobalValue>(this))) {
WriteAsOperandInternal(O, this, nullptr, nullptr, M);
return;
}
@@ -3184,33 +3180,35 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) cons
O << ' ';
}
- WriteAsOperandInternal(O, this, &TypePrinter, nullptr, M);
+ SlotTracker Machine(M, /* ShouldInitializeAllMetadata */ IsMetadata);
+ WriteAsOperandInternal(O, this, &TypePrinter, &Machine, M);
}
-void Metadata::print(raw_ostream &ROS) const {
+static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD,
+ const Module *M, bool OnlyAsOperand) {
formatted_raw_ostream OS(ROS);
- if (auto *N = dyn_cast<MDNode>(this)) {
- SlotTracker SlotTable(static_cast<Function *>(nullptr));
- AssemblyWriter W(OS, SlotTable, nullptr, nullptr);
- W.printMDNodeBody(N);
+ auto *N = dyn_cast<MDNode>(&MD);
+ TypePrinting TypePrinter;
+ SlotTracker Machine(M, /* ShouldInitializeAllMetadata */ N);
+ if (M)
+ TypePrinter.incorporateTypes(*M);
+
+ WriteAsOperandInternal(OS, &MD, &TypePrinter, &Machine, M,
+ /* FromValue */ true);
+ if (OnlyAsOperand || !N)
return;
- }
- printAsOperand(OS);
+
+ OS << " = ";
+ WriteMDNodeBodyInternal(OS, N, &TypePrinter, &Machine, M);
}
-void Metadata::printAsOperand(raw_ostream &ROS, bool PrintType,
- const Module *M) const {
- formatted_raw_ostream OS(ROS);
+void Metadata::printAsOperand(raw_ostream &OS, const Module *M) const {
+ printMetadataImpl(OS, *this, M, /* OnlyAsOperand */ true);
+}
- std::unique_ptr<TypePrinting> TypePrinter;
- if (PrintType) {
- TypePrinter.reset(new TypePrinting);
- if (M)
- TypePrinter->incorporateTypes(*M);
- }
- WriteAsOperandInternal(OS, this, TypePrinter.get(), nullptr, M,
- /* FromValue */ true);
+void Metadata::print(raw_ostream &OS, const Module *M) const {
+ printMetadataImpl(OS, *this, M, /* OnlyAsOperand */ false);
}
// Value::dump - allow easy printing of Values from the debugger.
@@ -3234,7 +3232,10 @@ LLVM_DUMP_METHOD
void NamedMDNode::dump() const { print(dbgs()); }
LLVM_DUMP_METHOD
-void Metadata::dump() const {
- print(dbgs());
+void Metadata::dump() const { dump(nullptr); }
+
+LLVM_DUMP_METHOD
+void Metadata::dump(const Module *M) const {
+ print(dbgs(), M);
dbgs() << '\n';
}
diff --git a/lib/IR/AsmWriter.h b/lib/IR/AsmWriter.h
deleted file mode 100644
index 7716fa6..0000000
--- a/lib/IR/AsmWriter.h
+++ /dev/null
@@ -1,129 +0,0 @@
-//===-- llvm/IR/AsmWriter.h - Printing LLVM IR as an assembly file - C++ --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This files defines the interface for the AssemblyWriter class used to print
-// LLVM IR and various helper classes that are used in printing.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_IR_ASMWRITER_H
-#define LLVM_LIB_IR_ASMWRITER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/TypeFinder.h"
-#include "llvm/IR/UseListOrder.h"
-#include "llvm/Support/FormattedStream.h"
-
-namespace llvm {
-
-class BasicBlock;
-class Function;
-class GlobalValue;
-class Comdat;
-class Module;
-class NamedMDNode;
-class Value;
-class SlotTracker;
-
-/// Create a new SlotTracker for a Module
-SlotTracker *createSlotTracker(const Module *M);
-
-//===----------------------------------------------------------------------===//
-// TypePrinting Class: Type printing machinery
-//===----------------------------------------------------------------------===//
-
-class TypePrinting {
- TypePrinting(const TypePrinting &) = delete;
- void operator=(const TypePrinting&) = delete;
-public:
-
- /// NamedTypes - The named types that are used by the current module.
- TypeFinder NamedTypes;
-
- /// NumberedTypes - The numbered types, along with their value.
- DenseMap<StructType*, unsigned> NumberedTypes;
-
-
- TypePrinting() {}
- ~TypePrinting() {}
-
- void incorporateTypes(const Module &M);
-
- void print(Type *Ty, raw_ostream &OS);
-
- void printStructBody(StructType *Ty, raw_ostream &OS);
-};
-
-class AssemblyWriter {
-protected:
- formatted_raw_ostream &Out;
- const Module *TheModule;
-
-private:
- std::unique_ptr<SlotTracker> ModuleSlotTracker;
- SlotTracker &Machine;
- TypePrinting TypePrinter;
- AssemblyAnnotationWriter *AnnotationWriter;
- SetVector<const Comdat *> Comdats;
- UseListOrderStack UseListOrders;
-
-public:
- /// Construct an AssemblyWriter with an external SlotTracker
- AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
- const Module *M, AssemblyAnnotationWriter *AAW);
-
- /// Construct an AssemblyWriter with an internally allocated SlotTracker
- AssemblyWriter(formatted_raw_ostream &o, const Module *M,
- AssemblyAnnotationWriter *AAW);
-
- virtual ~AssemblyWriter();
-
- void printMDNodeBody(const MDNode *MD);
- void printNamedMDNode(const NamedMDNode *NMD);
-
- void printModule(const Module *M);
-
- void writeOperand(const Value *Op, bool PrintType);
- void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
- void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
- void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope);
-
- void writeAllMDNodes();
- void writeMDNode(unsigned Slot, const MDNode *Node);
- void writeAllAttributeGroups();
-
- void printTypeIdentities();
- void printGlobal(const GlobalVariable *GV);
- void printAlias(const GlobalAlias *GV);
- void printComdat(const Comdat *C);
- void printFunction(const Function *F);
- void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx);
- void printBasicBlock(const BasicBlock *BB);
- void printInstructionLine(const Instruction &I);
- void printInstruction(const Instruction &I);
-
- void printUseListOrder(const UseListOrder &Order);
- void printUseLists(const Function *F);
-
-private:
- void init();
-
- // printInfoComment - Print a little comment after the instruction indicating
- // which slot it occupies.
- void printInfoComment(const Value &V);
-};
-
-} // namespace llvm
-
-#endif
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index 0da7784..d2dfeaa 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -7,7 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the auto-upgrade helper functions
+// This file implements the auto-upgrade helper functions.
+// This is where deprecated IR intrinsics and other IR features are updated to
+// current specifications.
//
//===----------------------------------------------------------------------===//
@@ -156,6 +158,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("x86.avx2.pcmpeq.") ||
Name.startswith("x86.avx2.pcmpgt.") ||
Name.startswith("x86.avx.vpermil.") ||
+ Name == "x86.avx.vinsertf128.pd.256" ||
+ Name == "x86.avx.vinsertf128.ps.256" ||
+ Name == "x86.avx.vinsertf128.si.256" ||
+ Name == "x86.avx2.vinserti128" ||
+ Name == "x86.avx.vextractf128.pd.256" ||
+ Name == "x86.avx.vextractf128.ps.256" ||
+ Name == "x86.avx.vextractf128.si.256" ||
+ Name == "x86.avx2.vextracti128" ||
Name == "x86.avx.movnt.dq.256" ||
Name == "x86.avx.movnt.pd.256" ||
Name == "x86.avx.movnt.ps.256" ||
@@ -171,6 +181,15 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.sse2.psrl.dq.bs" ||
Name == "x86.avx2.psll.dq.bs" ||
Name == "x86.avx2.psrl.dq.bs" ||
+ Name == "x86.sse41.pblendw" ||
+ Name == "x86.sse41.blendpd" ||
+ Name == "x86.sse41.blendps" ||
+ Name == "x86.avx.blend.pd.256" ||
+ Name == "x86.avx.blend.ps.256" ||
+ Name == "x86.avx2.pblendw" ||
+ Name == "x86.avx2.pblendd.128" ||
+ Name == "x86.avx2.pblendd.256" ||
+ Name == "x86.avx2.vbroadcasti128" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
@@ -184,17 +203,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (Name == "x86.sse41.ptestnzc")
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
}
- // Several blend and other instructions with maskes used the wrong number of
+ // Several blend and other instructions with masks used the wrong number of
// bits.
- if (Name == "x86.sse41.pblendw")
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw,
- NewFn);
- if (Name == "x86.sse41.blendpd")
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd,
- NewFn);
- if (Name == "x86.sse41.blendps")
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps,
- NewFn);
if (Name == "x86.sse41.insertps")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
NewFn);
@@ -207,24 +217,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
if (Name == "x86.sse41.mpsadbw")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
NewFn);
- if (Name == "x86.avx.blend.pd.256")
- return UpgradeX86IntrinsicsWith8BitMask(
- F, Intrinsic::x86_avx_blend_pd_256, NewFn);
- if (Name == "x86.avx.blend.ps.256")
- return UpgradeX86IntrinsicsWith8BitMask(
- F, Intrinsic::x86_avx_blend_ps_256, NewFn);
if (Name == "x86.avx.dp.ps.256")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
NewFn);
- if (Name == "x86.avx2.pblendw")
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw,
- NewFn);
- if (Name == "x86.avx2.pblendd.128")
- return UpgradeX86IntrinsicsWith8BitMask(
- F, Intrinsic::x86_avx2_pblendd_128, NewFn);
- if (Name == "x86.avx2.pblendd.256")
- return UpgradeX86IntrinsicsWith8BitMask(
- F, Intrinsic::x86_avx2_pblendd_256, NewFn);
if (Name == "x86.avx2.mpsadbw")
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
NewFn);
@@ -569,6 +564,15 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
+ } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
+ // Replace vbroadcasts with a vector shuffle.
+ Value *Op = Builder.CreatePointerCast(
+ CI->getArgOperand(0),
+ PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2)));
+ Value *Load = Builder.CreateLoad(Op);
+ const int Idxs[4] = { 0, 1, 0, 1 };
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ Idxs);
} else if (Name == "llvm.x86.sse2.psll.dq") {
// 128-bit shift left specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
@@ -609,6 +613,94 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
Shift);
+ } else if (Name == "llvm.x86.sse41.pblendw" ||
+ Name == "llvm.x86.sse41.blendpd" ||
+ Name == "llvm.x86.sse41.blendps" ||
+ Name == "llvm.x86.avx.blend.pd.256" ||
+ Name == "llvm.x86.avx.blend.ps.256" ||
+ Name == "llvm.x86.avx2.pblendw" ||
+ Name == "llvm.x86.avx2.pblendd.128" ||
+ Name == "llvm.x86.avx2.pblendd.256") {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ unsigned NumElts = VecTy->getNumElements();
+
+ SmallVector<Constant*, 16> Idxs;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
+ Idxs.push_back(Builder.getInt32(Idx));
+ }
+
+ Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
+ } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
+ Name == "llvm.x86.avx.vinsertf128.ps.256" ||
+ Name == "llvm.x86.avx.vinsertf128.si.256" ||
+ Name == "llvm.x86.avx2.vinserti128") {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ unsigned NumElts = VecTy->getNumElements();
+
+ // Mask off the high bits of the immediate value; hardware ignores those.
+ Imm = Imm & 1;
+
+ // Extend the second operand into a vector that is twice as big.
+ Value *UndefV = UndefValue::get(Op1->getType());
+ SmallVector<Constant*, 8> Idxs;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Idxs.push_back(Builder.getInt32(i));
+ }
+ Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
+
+ // Insert the second operand into the first operand.
+
+ // Note that there is no guarantee that instruction lowering will actually
+ // produce a vinsertf128 instruction for the created shuffles. In
+ // particular, the 0 immediate case involves no lane changes, so it can
+ // be handled as a blend.
+
+ // Example of shuffle mask for 32-bit elements:
+ // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
+
+ SmallVector<Constant*, 8> Idxs2;
+ // The low half of the result is either the low half of the 1st operand
+ // or the low half of the 2nd operand (the inserted vector).
+ for (unsigned i = 0; i != NumElts / 2; ++i) {
+ unsigned Idx = Imm ? i : (i + NumElts);
+ Idxs2.push_back(Builder.getInt32(Idx));
+ }
+ // The high half of the result is either the low half of the 2nd operand
+ // (the inserted vector) or the high half of the 1st operand.
+ for (unsigned i = NumElts / 2; i != NumElts; ++i) {
+ unsigned Idx = Imm ? (i + NumElts / 2) : i;
+ Idxs2.push_back(Builder.getInt32(Idx));
+ }
+ Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
+ } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
+ Name == "llvm.x86.avx.vextractf128.ps.256" ||
+ Name == "llvm.x86.avx.vextractf128.si.256" ||
+ Name == "llvm.x86.avx2.vextracti128") {
+ Value *Op0 = CI->getArgOperand(0);
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ unsigned NumElts = VecTy->getNumElements();
+
+ // Mask off the high bits of the immediate value; hardware ignores those.
+ Imm = Imm & 1;
+
+ // Get indexes for either the high half or low half of the input vector.
+ SmallVector<Constant*, 4> Idxs(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ unsigned Idx = Imm ? (i + NumElts) : i;
+ Idxs[i] = Builder.getInt32(Idx);
+ }
+
+ Value *UndefV = UndefValue::get(Op0->getType());
+ Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
} else {
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
if (Name == "llvm.x86.avx.vpermil.pd.256")
@@ -739,19 +831,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
return;
}
- case Intrinsic::x86_sse41_pblendw:
- case Intrinsic::x86_sse41_blendpd:
- case Intrinsic::x86_sse41_blendps:
case Intrinsic::x86_sse41_insertps:
case Intrinsic::x86_sse41_dppd:
case Intrinsic::x86_sse41_dpps:
case Intrinsic::x86_sse41_mpsadbw:
- case Intrinsic::x86_avx_blend_pd_256:
- case Intrinsic::x86_avx_blend_ps_256:
case Intrinsic::x86_avx_dp_ps_256:
- case Intrinsic::x86_avx2_pblendw:
- case Intrinsic::x86_avx2_pblendd_128:
- case Intrinsic::x86_avx2_pblendd_256:
case Intrinsic::x86_avx2_mpsadbw: {
// Need to truncate the last argument from i32 to i8 -- this argument models
// an inherently 8-bit immediate operand to these x86 instructions.
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index b3b3cbf..fe38385 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -29,10 +29,6 @@ ValueSymbolTable *BasicBlock::getValueSymbolTable() {
return nullptr;
}
-const DataLayout *BasicBlock::getDataLayout() const {
- return getParent()->getDataLayout();
-}
-
LLVMContext &BasicBlock::getContext() const {
return getType()->getContext();
}
@@ -102,14 +98,14 @@ void BasicBlock::eraseFromParent() {
getParent()->getBasicBlockList().erase(this);
}
-/// moveBefore - Unlink this basic block from its current function and
+/// Unlink this basic block from its current function and
/// insert it into the function that MovePos lives in, right before MovePos.
void BasicBlock::moveBefore(BasicBlock *MovePos) {
MovePos->getParent()->getBasicBlockList().splice(MovePos,
getParent()->getBasicBlockList(), this);
}
-/// moveAfter - Unlink this basic block from its current function and
+/// Unlink this basic block from its current function and
/// insert it into the function that MovePos lives in, right after MovePos.
void BasicBlock::moveAfter(BasicBlock *MovePos) {
Function::iterator I = MovePos;
@@ -117,6 +113,9 @@ void BasicBlock::moveAfter(BasicBlock *MovePos) {
getParent()->getBasicBlockList(), this);
}
+const Module *BasicBlock::getModule() const {
+ return getParent()->getParent();
+}
TerminatorInst *BasicBlock::getTerminator() {
if (InstList.empty()) return nullptr;
@@ -210,7 +209,7 @@ void BasicBlock::dropAllReferences() {
I->dropAllReferences();
}
-/// getSinglePredecessor - If this basic block has a single predecessor block,
+/// If this basic block has a single predecessor block,
/// return the block, otherwise return a null pointer.
BasicBlock *BasicBlock::getSinglePredecessor() {
pred_iterator PI = pred_begin(this), E = pred_end(this);
@@ -220,7 +219,7 @@ BasicBlock *BasicBlock::getSinglePredecessor() {
return (PI == E) ? ThePred : nullptr /*multiple preds*/;
}
-/// getUniquePredecessor - If this basic block has a unique predecessor block,
+/// If this basic block has a unique predecessor block,
/// return the block, otherwise return a null pointer.
/// Note that unique predecessor doesn't mean single edge, there can be
/// multiple edges from the unique predecessor to this block (for example
@@ -253,7 +252,7 @@ BasicBlock *BasicBlock::getUniqueSuccessor() {
return SuccBB;
}
-/// removePredecessor - This method is used to notify a BasicBlock that the
+/// This method is used to notify a BasicBlock that the
/// specified Predecessor of the block is no longer able to reach it. This is
/// actually not used to update the Predecessor list, but is actually used to
/// update the PHI nodes that reside in the block. Note that this should be
@@ -330,7 +329,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
}
-/// splitBasicBlock - This splits a basic block into two at the specified
+/// This splits a basic block into two at the specified
/// instruction. Note that all instructions BEFORE the specified iterator stay
/// as part of the original basic block, an unconditional branch is added to
/// the new BB, and the rest of the instructions in the BB are moved to the new
@@ -401,14 +400,13 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
}
}
-/// isLandingPad - Return true if this basic block is a landing pad. I.e., it's
+/// Return true if this basic block is a landing pad. I.e., it's
/// the destination of the 'unwind' edge of an invoke instruction.
bool BasicBlock::isLandingPad() const {
return isa<LandingPadInst>(getFirstNonPHI());
}
-/// getLandingPadInst() - Return the landingpad instruction associated with
-/// the landing pad.
+/// Return the landingpad instruction associated with the landing pad.
LandingPadInst *BasicBlock::getLandingPadInst() {
return dyn_cast<LandingPadInst>(getFirstNonPHI());
}
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index a915d28..d97d2c4 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -1120,27 +1120,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
return ConstantInt::get(CI1->getContext(), C1V | C2V);
case Instruction::Xor:
return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
- case Instruction::Shl: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
- case Instruction::LShr: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
- case Instruction::AShr: {
- uint32_t shiftAmt = C2V.getZExtValue();
- if (shiftAmt < C1V.getBitWidth())
- return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
- else
- return UndefValue::get(C1->getType()); // too big shift is undef
- }
+ case Instruction::Shl:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.shl(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ case Instruction::LShr:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
+ case Instruction::AShr:
+ if (C2V.ult(C1V.getBitWidth()))
+ return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V));
+ return UndefValue::get(C1->getType()); // too big shift is undef
}
}
@@ -1327,7 +1318,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
if (!isa<ConstantExpr>(V1)) {
if (!isa<ConstantExpr>(V2)) {
- // We distilled thisUse the standard constant folder for a few cases
+ // Simple case, use the standard constant folder.
ConstantInt *R = nullptr;
R = dyn_cast<ConstantInt>(
ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
@@ -1665,15 +1656,22 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
// Handle some degenerate cases first
if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ CmpInst::Predicate Predicate = CmpInst::Predicate(pred);
+ bool isIntegerPredicate = ICmpInst::isIntPredicate(Predicate);
// For EQ and NE, we can always pick a value for the undef to make the
// predicate pass or fail, so we can return undef.
- // Also, if both operands are undef, we can return undef.
- if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
- (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+ // Also, if both operands are undef, we can return undef for int comparison.
+ if (ICmpInst::isEquality(Predicate) || (isIntegerPredicate && C1 == C2))
return UndefValue::get(ResultTy);
- // Otherwise, pick the same value as the non-undef operand, and fold
- // it to true or false.
- return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+
+ // Otherwise, for integer compare, pick the same value as the non-undef
+ // operand, and fold it to true or false.
+ if (isIntegerPredicate)
+ return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+
+ // Choosing NaN for the undef will always make unordered comparison succeed
+ // and ordered comparison fails.
+ return ConstantInt::get(ResultTy, CmpInst::isUnordered(Predicate));
}
// icmp eq/ne(null,GV) -> false/true
@@ -1789,7 +1787,10 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
return ConstantVector::get(ResElts);
}
- if (C1->getType()->isFloatingPointTy()) {
+ if (C1->getType()->isFloatingPointTy() &&
+ // Only call evaluateFCmpRelation if we have a constant expr to avoid
+ // infinite recursive loop
+ (isa<ConstantExpr>(C1) || isa<ConstantExpr>(C2))) {
int Result = -1; // -1 = unknown, 0 = known false, 1 = known true.
switch (evaluateFCmpRelation(C1, C2)) {
default: llvm_unreachable("Unknown relation!");
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index f8e9ba4..91095cf 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -49,14 +49,15 @@ ConstantRange::ConstantRange(APIntMoveTy L, APIntMoveTy U)
"Lower == Upper, but they aren't min or max value!");
}
-ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
- const ConstantRange &CR) {
+ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
+ const ConstantRange &CR) {
if (CR.isEmptySet())
return CR;
uint32_t W = CR.getBitWidth();
switch (Pred) {
- default: llvm_unreachable("Invalid ICmp predicate to makeICmpRegion()");
+ default:
+ llvm_unreachable("Invalid ICmp predicate to makeAllowedICmpRegion()");
case CmpInst::ICMP_EQ:
return CR;
case CmpInst::ICMP_NE:
@@ -114,6 +115,16 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
}
}
+ConstantRange ConstantRange::makeSatisfyingICmpRegion(CmpInst::Predicate Pred,
+ const ConstantRange &CR) {
+ // Follows from De-Morgan's laws:
+ //
+ // ~(~A union ~B) == A intersect B.
+ //
+ return makeAllowedICmpRegion(CmpInst::getInversePredicate(Pred), CR)
+ .inverse();
+}
+
/// isFullSet - Return true if this set contains all of the elements possible
/// for this data-type
bool ConstantRange::isFullSet() const {
@@ -587,6 +598,13 @@ ConstantRange::multiply(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ // Multiplication is signedness-independent. However different ranges can be
+ // obtained depending on how the input ranges are treated. These different
+ // ranges are all conservatively correct, but one might be better than the
+ // other. We calculate two ranges; one treating the inputs as unsigned
+ // and the other signed, then return the smallest of these ranges.
+
+ // Unsigned range first.
APInt this_min = getUnsignedMin().zext(getBitWidth() * 2);
APInt this_max = getUnsignedMax().zext(getBitWidth() * 2);
APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2);
@@ -594,7 +612,26 @@ ConstantRange::multiply(const ConstantRange &Other) const {
ConstantRange Result_zext = ConstantRange(this_min * Other_min,
this_max * Other_max + 1);
- return Result_zext.truncate(getBitWidth());
+ ConstantRange UR = Result_zext.truncate(getBitWidth());
+
+ // Now the signed range. Because we could be dealing with negative numbers
+ // here, the lower bound is the smallest of the cartesian product of the
+ // lower and upper ranges; for example:
+ // [-1,4) * [-2,3) = min(-1*-2, -1*2, 3*-2, 3*2) = -6.
+ // Similarly for the upper bound, swapping min for max.
+
+ this_min = getSignedMin().sext(getBitWidth() * 2);
+ this_max = getSignedMax().sext(getBitWidth() * 2);
+ Other_min = Other.getSignedMin().sext(getBitWidth() * 2);
+ Other_max = Other.getSignedMax().sext(getBitWidth() * 2);
+
+ auto L = {this_min * Other_min, this_min * Other_max,
+ this_max * Other_min, this_max * Other_max};
+ auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); };
+ ConstantRange Result_sext(std::min(L, Compare), std::max(L, Compare) + 1);
+ ConstantRange SR = Result_sext.truncate(getBitWidth());
+
+ return UR.getSetSize().ult(SR.getSetSize()) ? UR : SR;
}
ConstantRange
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 0bf61a7..e51a396 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -1215,11 +1215,9 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
bool OnlyIfReduced) const {
assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
- bool AnyChange = Ty != getType();
- for (unsigned i = 0; i != Ops.size(); ++i)
- AnyChange |= Ops[i] != getOperand(i);
- if (!AnyChange) // No operands changed, return self.
+ // If no operands changed return self.
+ if (Ty == getType() && std::equal(Ops.begin(), Ops.end(), op_begin()))
return const_cast<ConstantExpr*>(this);
Type *OnlyIfReducedTy = OnlyIfReduced ? Ty : nullptr;
@@ -2971,10 +2969,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
}
Instruction *ConstantExpr::getAsInstruction() {
- SmallVector<Value*,4> ValueOperands;
- for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
- ValueOperands.push_back(cast<Value>(I));
-
+ SmallVector<Value *, 4> ValueOperands(op_begin(), op_end());
ArrayRef<Value*> Ops(ValueOperands);
switch (getOpcode()) {
@@ -3006,12 +3001,14 @@ Instruction *ConstantExpr::getAsInstruction() {
case Instruction::ShuffleVector:
return new ShuffleVectorInst(Ops[0], Ops[1], Ops[2]);
- case Instruction::GetElementPtr:
- if (cast<GEPOperator>(this)->isInBounds())
- return GetElementPtrInst::CreateInBounds(Ops[0], Ops.slice(1));
- else
- return GetElementPtrInst::Create(Ops[0], Ops.slice(1));
-
+ case Instruction::GetElementPtr: {
+ const auto *GO = cast<GEPOperator>(this);
+ if (GO->isInBounds())
+ return GetElementPtrInst::CreateInBounds(GO->getSourceElementType(),
+ Ops[0], Ops.slice(1));
+ return GetElementPtrInst::Create(GO->getSourceElementType(), Ops[0],
+ Ops.slice(1));
+ }
case Instruction::ICmp:
case Instruction::FCmp:
return CmpInst::Create((Instruction::OtherOps)getOpcode(),
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index f007616..613147e 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -2506,7 +2506,7 @@ LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
LLVMValueRef *Indices, unsigned NumIndices,
const char *Name) {
ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
- return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), IdxList, Name));
+ return wrap(unwrap(B)->CreateGEP(nullptr, unwrap(Pointer), IdxList, Name));
}
LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 2cb27ca..9677de4 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -121,18 +121,10 @@ void DIBuilder::finalize() {
}
/// If N is compile unit return NULL otherwise return N.
-static MDNode *getNonCompileUnitScope(MDNode *N) {
- if (DIDescriptor(N).isCompileUnit())
+static MDScope *getNonCompileUnitScope(MDNode *N) {
+ if (!N || isa<MDCompileUnit>(N))
return nullptr;
- return N;
-}
-
-static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename,
- StringRef Directory) {
- assert(!Filename.empty() && "Unable to create file without name");
- Metadata *Pair[] = {MDString::get(VMContext, Filename),
- MDString::get(VMContext, Directory)};
- return MDNode::get(VMContext, Pair);
+ return cast<MDScope>(N);
}
DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
@@ -157,22 +149,12 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
TempGVs = MDTuple::getTemporary(VMContext, None).release();
TempImportedModules = MDTuple::getTemporary(VMContext, None).release();
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_compile_unit)
- .concat(Lang)
- .concat(Producer)
- .concat(isOptimized)
- .concat(Flags)
- .concat(RunTimeVer)
- .concat(SplitName)
- .concat(Kind)
- .get(VMContext),
- createFilePathPair(VMContext, Filename, Directory),
- TempEnumTypes, TempRetainTypes, TempSubprograms, TempGVs,
- TempImportedModules};
-
// TODO: Switch to getDistinct(). We never want to merge compile units based
// on contents.
- MDNode *CUNode = MDNode::get(VMContext, Elts);
+ MDNode *CUNode = MDCompileUnit::get(
+ VMContext, Lang, MDFile::get(VMContext, Filename, Directory), Producer,
+ isOptimized, Flags, RunTimeVer, SplitName, Kind, TempEnumTypes,
+ TempRetainTypes, TempSubprograms, TempGVs, TempImportedModules);
// Create a named metadata so that it is easier to find cu in a module.
// Note that we only generate this when the caller wants to actually
@@ -192,11 +174,7 @@ static DIImportedEntity
createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context,
Metadata *NS, unsigned Line, StringRef Name,
SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) {
- const MDNode *R;
- Metadata *Elts[] = {HeaderBuilder::get(Tag).concat(Line).concat(Name).get(C),
- Context, NS};
- R = MDNode::get(C, Elts);
- DIImportedEntity M(R);
+ DIImportedEntity M = MDImportedEntity::get(C, Tag, Context, NS, Line, Name);
assert(M.Verify() && "Imported module should be valid");
AllImportedModules.emplace_back(M.get());
return M;
@@ -236,39 +214,17 @@ DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
}
DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_file_type).get(VMContext),
- createFilePathPair(VMContext, Filename, Directory)};
- return DIFile(MDNode::get(VMContext, Elts));
+ return MDFile::get(VMContext, Filename, Directory);
}
DIEnumerator DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
assert(!Name.empty() && "Unable to create enumerator without name");
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_enumerator)
- .concat(Name)
- .concat(Val)
- .get(VMContext)};
- return DIEnumerator(MDNode::get(VMContext, Elts));
+ return MDEnumerator::get(VMContext, Val, Name);
}
DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) {
assert(!Name.empty() && "Unable to create type without name");
- // Unspecified types are encoded in DIBasicType format. Line number, filename,
- // size, alignment, offset and flags are always empty here.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_unspecified_type)
- .concat(Name)
- .concat(0)
- .concat(0)
- .concat(0)
- .concat(0)
- .concat(0)
- .concat(0)
- .get(VMContext),
- nullptr, // Filename
- nullptr // Unused
- };
- return DIBasicType(MDNode::get(VMContext, Elts));
+ return MDBasicType::get(VMContext, dwarf::DW_TAG_unspecified_type, Name);
}
DIBasicType DIBuilder::createNullPtrType() {
@@ -279,142 +235,61 @@ DIBasicType
DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
uint64_t AlignInBits, unsigned Encoding) {
assert(!Name.empty() && "Unable to create type without name");
- // Basic types are encoded in DIBasicType format. Line number, filename,
- // offset and flags are always empty here.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_base_type)
- .concat(Name)
- .concat(0) // Line
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(0) // Offset
- .concat(0) // Flags
- .concat(Encoding)
- .get(VMContext),
- nullptr, // Filename
- nullptr // Unused
- };
- return DIBasicType(MDNode::get(VMContext, Elts));
+ return MDBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits,
+ AlignInBits, Encoding);
}
DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
- // Qualified types are encoded in DIDerivedType format.
- Metadata *Elts[] = {HeaderBuilder::get(Tag)
- .concat(StringRef()) // Name
- .concat(0) // Line
- .concat(0) // Size
- .concat(0) // Align
- .concat(0) // Offset
- .concat(0) // Flags
- .get(VMContext),
- nullptr, // Filename
- nullptr, // Unused
- FromTy.getRef()};
- return DIDerivedType(MDNode::get(VMContext, Elts));
+ return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
+ FromTy.getRef(), 0, 0, 0, 0);
}
DIDerivedType
DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
uint64_t AlignInBits, StringRef Name) {
- // Pointer types are encoded in DIDerivedType format.
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_pointer_type)
- .concat(Name)
- .concat(0) // Line
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(0) // Offset
- .concat(0) // Flags
- .get(VMContext),
- nullptr, // Filename
- nullptr, // Unused
- PointeeTy.getRef()};
- return DIDerivedType(MDNode::get(VMContext, Elts));
+ // FIXME: Why is there a name here?
+ return MDDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
+ nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits,
+ AlignInBits, 0, 0);
}
DIDerivedType
DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base,
uint64_t SizeInBits, uint64_t AlignInBits) {
- // Pointer types are encoded in DIDerivedType format.
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_ptr_to_member_type)
- .concat(StringRef())
- .concat(0) // Line
- .concat(SizeInBits) // Size
- .concat(AlignInBits) // Align
- .concat(0) // Offset
- .concat(0) // Flags
- .get(VMContext),
- nullptr, // Filename
- nullptr, // Unused
- PointeeTy.getRef(), Base.getRef()};
- return DIDerivedType(MDNode::get(VMContext, Elts));
+ return MDDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "",
+ nullptr, 0, nullptr, PointeeTy.getRef(), SizeInBits,
+ AlignInBits, 0, 0, Base.getRef());
}
DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
assert(RTy.isType() && "Unable to create reference type");
- // References are encoded in DIDerivedType format.
- Metadata *Elts[] = {HeaderBuilder::get(Tag)
- .concat(StringRef()) // Name
- .concat(0) // Line
- .concat(0) // Size
- .concat(0) // Align
- .concat(0) // Offset
- .concat(0) // Flags
- .get(VMContext),
- nullptr, // Filename
- nullptr, // TheCU,
- RTy.getRef()};
- return DIDerivedType(MDNode::get(VMContext, Elts));
+ return MDDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
+ RTy.getRef(), 0, 0, 0, 0);
}
DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
unsigned LineNo, DIDescriptor Context) {
- // typedefs are encoded in DIDerivedType format.
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_typedef)
- .concat(Name)
- .concat(LineNo)
- .concat(0) // Size
- .concat(0) // Align
- .concat(0) // Offset
- .concat(0) // Flags
- .get(VMContext),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Context)).getRef(),
- Ty.getRef()};
- return DIDerivedType(MDNode::get(VMContext, Elts));
+ return MDDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name,
+ File.getFileNode(), LineNo,
+ DIScope(getNonCompileUnitScope(Context)).getRef(),
+ Ty.getRef(), 0, 0, 0, 0);
}
DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
// typedefs are encoded in DIDerivedType format.
assert(Ty.isType() && "Invalid type!");
assert(FriendTy.isType() && "Invalid friend type!");
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_friend)
- .concat(StringRef()) // Name
- .concat(0) // Line
- .concat(0) // Size
- .concat(0) // Align
- .concat(0) // Offset
- .concat(0) // Flags
- .get(VMContext),
- nullptr, Ty.getRef(), FriendTy.getRef()};
- return DIDerivedType(MDNode::get(VMContext, Elts));
+ return MDDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0,
+ Ty.getRef(), FriendTy.getRef(), 0, 0, 0, 0);
}
DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
uint64_t BaseOffset,
unsigned Flags) {
assert(Ty.isType() && "Unable to create inheritance");
- // TAG_inheritance is encoded in DIDerivedType format.
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_inheritance)
- .concat(StringRef()) // Name
- .concat(0) // Line
- .concat(0) // Size
- .concat(0) // Align
- .concat(BaseOffset)
- .concat(Flags)
- .get(VMContext),
- nullptr, Ty.getRef(), BaseTy.getRef()};
- auto R = DIDerivedType(MDNode::get(VMContext, Elts));
- return R;
+ return MDDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
+ 0, Ty.getRef(), BaseTy.getRef(), 0, 0, BaseOffset,
+ Flags);
}
DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
@@ -423,22 +298,13 @@ DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name,
uint64_t AlignInBits,
uint64_t OffsetInBits, unsigned Flags,
DIType Ty) {
- // TAG_member is encoded in DIDerivedType format.
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
- .concat(Name)
- .concat(LineNumber)
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(OffsetInBits)
- .concat(Flags)
- .get(VMContext),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Scope)).getRef(),
- Ty.getRef()};
- return DIDerivedType(MDNode::get(VMContext, Elts));
-}
-
-static Metadata *getConstantOrNull(Constant *C) {
+ return MDDerivedType::get(
+ VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
+ DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), SizeInBits,
+ AlignInBits, OffsetInBits, Flags);
+}
+
+static ConstantAsMetadata *getConstantOrNull(Constant *C) {
if (C)
return ConstantAsMetadata::get(C);
return nullptr;
@@ -451,18 +317,10 @@ DIDerivedType DIBuilder::createStaticMemberType(DIDescriptor Scope,
llvm::Constant *Val) {
// TAG_member is encoded in DIDerivedType format.
Flags |= DIDescriptor::FlagStaticMember;
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
- .concat(Name)
- .concat(LineNumber)
- .concat(0) // Size
- .concat(0) // Align
- .concat(0) // Offset
- .concat(Flags)
- .get(VMContext),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Scope)).getRef(),
- Ty.getRef(), getConstantOrNull(Val)};
- return DIDerivedType(MDNode::get(VMContext, Elts));
+ return MDDerivedType::get(
+ VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
+ DIScope(getNonCompileUnitScope(Scope)).getRef(), Ty.getRef(), 0, 0, 0,
+ Flags, getConstantOrNull(Val));
}
DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File,
@@ -471,33 +329,18 @@ DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File,
uint64_t AlignInBits,
uint64_t OffsetInBits, unsigned Flags,
DIType Ty, MDNode *PropertyNode) {
- // TAG_member is encoded in DIDerivedType format.
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_member)
- .concat(Name)
- .concat(LineNumber)
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(OffsetInBits)
- .concat(Flags)
- .get(VMContext),
- File.getFileNode(), getNonCompileUnitScope(File), Ty,
- PropertyNode};
- return DIDerivedType(MDNode::get(VMContext, Elts));
+ return MDDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
+ LineNumber, getNonCompileUnitScope(File),
+ Ty.getRef(), SizeInBits, AlignInBits, OffsetInBits,
+ Flags, PropertyNode);
}
DIObjCProperty
DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber,
StringRef GetterName, StringRef SetterName,
unsigned PropertyAttributes, DIType Ty) {
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_APPLE_property)
- .concat(Name)
- .concat(LineNumber)
- .concat(GetterName)
- .concat(SetterName)
- .concat(PropertyAttributes)
- .get(VMContext),
- File, Ty};
- return DIObjCProperty(MDNode::get(VMContext, Elts));
+ return MDObjCProperty::get(VMContext, Name, File, LineNumber, GetterName,
+ SetterName, PropertyAttributes, Ty);
}
DITemplateTypeParameter
@@ -505,13 +348,7 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
DIType Ty) {
assert(!DIScope(getNonCompileUnitScope(Context)).getRef() &&
"Expected compile unit");
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_template_type_parameter)
- .concat(Name)
- .concat(0)
- .concat(0)
- .get(VMContext),
- nullptr, Ty.getRef(), nullptr};
- return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
+ return MDTemplateTypeParameter::get(VMContext, Name, Ty.getRef());
}
static DITemplateValueParameter
@@ -520,10 +357,7 @@ createTemplateValueParameterHelper(LLVMContext &VMContext, unsigned Tag,
DIType Ty, Metadata *MD) {
assert(!DIScope(getNonCompileUnitScope(Context)).getRef() &&
"Expected compile unit");
- Metadata *Elts[] = {
- HeaderBuilder::get(Tag).concat(Name).concat(0).concat(0).get(VMContext),
- nullptr, Ty.getRef(), MD, nullptr};
- return DITemplateValueParameter(MDNode::get(VMContext, Elts));
+ return MDTemplateValueParameter::get(VMContext, Tag, Name, Ty.getRef(), MD);
}
DITemplateValueParameter
@@ -563,23 +397,11 @@ DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
assert((!Context || Context.isScope() || Context.isType()) &&
"createClassType should be called with a valid Context");
// TAG_class_type is encoded in DICompositeType format.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_class_type)
- .concat(Name)
- .concat(LineNumber)
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(OffsetInBits)
- .concat(Flags)
- .concat(0)
- .get(VMContext),
- File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(),
- DerivedFrom.getRef(), Elements, VTableHolder.getRef(), TemplateParams,
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)};
- DICompositeType R(MDNode::get(VMContext, Elts));
- assert(R.isCompositeType() &&
- "createClassType should return a DICompositeType");
+ DICompositeType R = MDCompositeType::get(
+ VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
+ DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(),
+ SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, 0,
+ VTableHolder.getRef(), TemplateParams, UniqueIdentifier);
if (!UniqueIdentifier.empty())
retainType(R);
trackIfUnresolved(R);
@@ -596,24 +418,11 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
unsigned RunTimeLang,
DIType VTableHolder,
StringRef UniqueIdentifier) {
- // TAG_structure_type is encoded in DICompositeType format.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_structure_type)
- .concat(Name)
- .concat(LineNumber)
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(0)
- .concat(Flags)
- .concat(RunTimeLang)
- .get(VMContext),
- File.getFileNode(), DIScope(getNonCompileUnitScope(Context)).getRef(),
- DerivedFrom.getRef(), Elements, VTableHolder.getRef(), nullptr,
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)};
- DICompositeType R(MDNode::get(VMContext, Elts));
- assert(R.isCompositeType() &&
- "createStructType should return a DICompositeType");
+ DICompositeType R = MDCompositeType::get(
+ VMContext, dwarf::DW_TAG_structure_type, Name, File, LineNumber,
+ DIScope(getNonCompileUnitScope(Context)).getRef(), DerivedFrom.getRef(),
+ SizeInBits, AlignInBits, 0, Flags, Elements, RunTimeLang,
+ VTableHolder.getRef(), nullptr, UniqueIdentifier);
if (!UniqueIdentifier.empty())
retainType(R);
trackIfUnresolved(R);
@@ -627,22 +436,11 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
DIArray Elements,
unsigned RunTimeLang,
StringRef UniqueIdentifier) {
- // TAG_union_type is encoded in DICompositeType format.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_union_type)
- .concat(Name)
- .concat(LineNumber)
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(0) // Offset
- .concat(Flags)
- .concat(RunTimeLang)
- .get(VMContext),
- File.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(),
- nullptr, Elements, nullptr, nullptr,
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)};
- DICompositeType R(MDNode::get(VMContext, Elts));
+ DICompositeType R = MDCompositeType::get(
+ VMContext, dwarf::DW_TAG_union_type, Name, File, LineNumber,
+ DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
+ AlignInBits, 0, Flags, Elements, RunTimeLang, nullptr, nullptr,
+ UniqueIdentifier);
if (!UniqueIdentifier.empty())
retainType(R);
trackIfUnresolved(R);
@@ -652,43 +450,18 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
DISubroutineType DIBuilder::createSubroutineType(DIFile File,
DITypeArray ParameterTypes,
unsigned Flags) {
- // TAG_subroutine_type is encoded in DICompositeType format.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_subroutine_type)
- .concat(StringRef())
- .concat(0) // Line
- .concat(0) // Size
- .concat(0) // Align
- .concat(0) // Offset
- .concat(Flags) // Flags
- .concat(0)
- .get(VMContext),
- nullptr, nullptr, nullptr, ParameterTypes, nullptr, nullptr,
- nullptr // Type Identifer
- };
- return DISubroutineType(MDNode::get(VMContext, Elts));
+ return MDSubroutineType::get(VMContext, Flags, ParameterTypes);
}
DICompositeType DIBuilder::createEnumerationType(
DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
DIType UnderlyingType, StringRef UniqueIdentifier) {
- // TAG_enumeration_type is encoded in DICompositeType format.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_enumeration_type)
- .concat(Name)
- .concat(LineNumber)
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(0) // Offset
- .concat(0) // Flags
- .concat(0)
- .get(VMContext),
- File.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(),
- UnderlyingType.getRef(), Elements, nullptr, nullptr,
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)};
- DICompositeType CTy(MDNode::get(VMContext, Elts));
+ DICompositeType CTy = MDCompositeType::get(
+ VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber,
+ DIScope(getNonCompileUnitScope(Scope)).getRef(), UnderlyingType.getRef(),
+ SizeInBits, AlignInBits, 0, 0, Elements, 0, nullptr, nullptr,
+ UniqueIdentifier);
AllEnumTypes.push_back(CTy);
if (!UniqueIdentifier.empty())
retainType(CTy);
@@ -698,85 +471,38 @@ DICompositeType DIBuilder::createEnumerationType(
DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
DIType Ty, DIArray Subscripts) {
- // TAG_array_type is encoded in DICompositeType format.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_array_type)
- .concat(StringRef())
- .concat(0) // Line
- .concat(Size)
- .concat(AlignInBits)
- .concat(0) // Offset
- .concat(0) // Flags
- .concat(0)
- .get(VMContext),
- nullptr, // Filename/Directory,
- nullptr, // Unused
- Ty.getRef(), Subscripts, nullptr, nullptr,
- nullptr // Type Identifer
- };
- DICompositeType R(MDNode::get(VMContext, Elts));
+ auto *R = MDCompositeType::get(VMContext, dwarf::DW_TAG_array_type, "",
+ nullptr, 0, nullptr, Ty.getRef(), Size,
+ AlignInBits, 0, 0, Subscripts, 0, nullptr);
trackIfUnresolved(R);
return R;
}
DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
DIType Ty, DIArray Subscripts) {
- // A vector is an array type with the FlagVector flag applied.
- Metadata *Elts[] = {
- HeaderBuilder::get(dwarf::DW_TAG_array_type)
- .concat("")
- .concat(0) // Line
- .concat(Size)
- .concat(AlignInBits)
- .concat(0) // Offset
- .concat(DIType::FlagVector)
- .concat(0)
- .get(VMContext),
- nullptr, // Filename/Directory,
- nullptr, // Unused
- Ty.getRef(), Subscripts, nullptr, nullptr,
- nullptr // Type Identifer
- };
- DICompositeType R(MDNode::get(VMContext, Elts));
+ auto *R = MDCompositeType::get(
+ VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, nullptr, Ty.getRef(),
+ Size, AlignInBits, 0, DIType::FlagVector, Subscripts, 0, nullptr);
trackIfUnresolved(R);
return R;
}
-static HeaderBuilder setTypeFlagsInHeader(StringRef Header,
- unsigned FlagsToSet) {
- DIHeaderFieldIterator I(Header);
- std::advance(I, 6);
-
- unsigned Flags;
- if (I->getAsInteger(0, Flags))
- Flags = 0;
- Flags |= FlagsToSet;
-
- return HeaderBuilder()
- .concat(I.getPrefix())
- .concat(Flags)
- .concat(I.getSuffix());
-}
-
static DIType createTypeWithFlags(LLVMContext &Context, DIType Ty,
unsigned FlagsToSet) {
- SmallVector<Metadata *, 9> Elts;
- MDNode *N = Ty;
- assert(N && "Unexpected input DIType!");
- // Update header field.
- Elts.push_back(setTypeFlagsInHeader(Ty.getHeader(), FlagsToSet).get(Context));
- Elts.append(N->op_begin() + 1, N->op_end());
-
- return DIType(MDNode::get(Context, Elts));
+ TempMDType NewTy = cast<MDType>(static_cast<MDNode *>(Ty))->clone();
+ NewTy->setFlags(NewTy->getFlags() | FlagsToSet);
+ return MDNode::replaceWithUniqued(std::move(NewTy));
}
DIType DIBuilder::createArtificialType(DIType Ty) {
+ // FIXME: Restrict this to the nodes where it's valid.
if (Ty.isArtificial())
return Ty;
return createTypeWithFlags(VMContext, Ty, DIType::FlagArtificial);
}
DIType DIBuilder::createObjectPointerType(DIType Ty) {
+ // FIXME: Restrict this to the nodes where it's valid.
if (Ty.isObjectPointer())
return Ty;
unsigned Flags = DIType::FlagObjectPointer | DIType::FlagArtificial;
@@ -794,26 +520,13 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
DIFile F, unsigned Line, unsigned RuntimeLang,
uint64_t SizeInBits, uint64_t AlignInBits,
StringRef UniqueIdentifier) {
- // Create a temporary MDNode.
- Metadata *Elts[] = {
- HeaderBuilder::get(Tag)
- .concat(Name)
- .concat(Line)
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(0) // Offset
- .concat(DIDescriptor::FlagFwdDecl)
- .concat(RuntimeLang)
- .get(VMContext),
- F.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr,
- DIArray(), nullptr,
- nullptr, // TemplateParams
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)};
- MDNode *Node = MDNode::get(VMContext, Elts);
- DICompositeType RetTy(Node);
- assert(RetTy.isCompositeType() &&
- "createForwardDecl result should be a DIType");
+ // FIXME: Define in terms of createReplaceableForwardDecl() by calling
+ // replaceWithUniqued().
+ DICompositeType RetTy = MDCompositeType::get(
+ VMContext, Tag, Name, F.getFileNode(), Line,
+ DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
+ AlignInBits, 0, DIDescriptor::FlagFwdDecl, nullptr, RuntimeLang, nullptr,
+ nullptr, UniqueIdentifier);
if (!UniqueIdentifier.empty())
retainType(RetTy);
trackIfUnresolved(RetTy);
@@ -824,25 +537,12 @@ DICompositeType DIBuilder::createReplaceableCompositeType(
unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line,
unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits,
unsigned Flags, StringRef UniqueIdentifier) {
- // Create a temporary MDNode.
- Metadata *Elts[] = {
- HeaderBuilder::get(Tag)
- .concat(Name)
- .concat(Line)
- .concat(SizeInBits)
- .concat(AlignInBits)
- .concat(0) // Offset
- .concat(Flags)
- .concat(RuntimeLang)
- .get(VMContext),
- F.getFileNode(), DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr,
- DIArray(), nullptr,
- nullptr, // TemplateParams
- UniqueIdentifier.empty() ? nullptr
- : MDString::get(VMContext, UniqueIdentifier)};
- DICompositeType RetTy(MDNode::getTemporary(VMContext, Elts).release());
- assert(RetTy.isCompositeType() &&
- "createReplaceableForwardDecl result should be a DIType");
+ DICompositeType RetTy =
+ MDCompositeType::getTemporary(
+ VMContext, Tag, Name, F.getFileNode(), Line,
+ DIScope(getNonCompileUnitScope(Scope)).getRef(), nullptr, SizeInBits,
+ AlignInBits, 0, Flags, nullptr, RuntimeLang,
+ nullptr, nullptr, UniqueIdentifier).release();
if (!UniqueIdentifier.empty())
retainType(RetTy);
trackIfUnresolved(RetTy);
@@ -865,62 +565,39 @@ DITypeArray DIBuilder::getOrCreateTypeArray(ArrayRef<Metadata *> Elements) {
}
DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subrange_type)
- .concat(Lo)
- .concat(Count)
- .get(VMContext)};
-
- return DISubrange(MDNode::get(VMContext, Elts));
+ return MDSubrange::get(VMContext, Count, Lo);
}
-static DIGlobalVariable createGlobalVariableHelper(
- LLVMContext &VMContext, DIDescriptor Context, StringRef Name,
- StringRef LinkageName, DIFile F, unsigned LineNumber, DITypeRef Ty,
- bool isLocalToUnit, Constant *Val, MDNode *Decl, bool isDefinition,
- std::function<MDNode *(ArrayRef<Metadata *>)> CreateFunc) {
-
+static void checkGlobalVariableScope(DIDescriptor Context) {
MDNode *TheCtx = getNonCompileUnitScope(Context);
if (DIScope(TheCtx).isCompositeType()) {
assert(!DICompositeType(TheCtx).getIdentifier() &&
"Context of a global variable should not be a type with identifier");
}
-
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_variable)
- .concat(Name)
- .concat(Name)
- .concat(LinkageName)
- .concat(LineNumber)
- .concat(isLocalToUnit)
- .concat(isDefinition)
- .get(VMContext),
- TheCtx, F, Ty, getConstantOrNull(Val),
- DIDescriptor(Decl)};
-
- return DIGlobalVariable(CreateFunc(Elts));
}
DIGlobalVariable DIBuilder::createGlobalVariable(
DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
MDNode *Decl) {
- return createGlobalVariableHelper(
- VMContext, Context, Name, LinkageName, F, LineNumber, Ty, isLocalToUnit,
- Val, Decl, true, [&](ArrayRef<Metadata *> Elts) -> MDNode *{
- MDNode *Node = MDNode::get(VMContext, Elts);
- AllGVs.push_back(Node);
- return Node;
- });
+ checkGlobalVariableScope(Context);
+
+ auto *N = MDGlobalVariable::get(VMContext, Context, Name, LinkageName, F,
+ LineNumber, Ty, isLocalToUnit, true,
+ getConstantOrNull(Val), Decl);
+ AllGVs.push_back(N);
+ return N;
}
DIGlobalVariable DIBuilder::createTempGlobalVariableFwdDecl(
DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F,
unsigned LineNumber, DITypeRef Ty, bool isLocalToUnit, Constant *Val,
MDNode *Decl) {
- return createGlobalVariableHelper(VMContext, Context, Name, LinkageName, F,
- LineNumber, Ty, isLocalToUnit, Val, Decl,
- false, [&](ArrayRef<Metadata *> Elts) {
- return MDNode::getTemporary(VMContext, Elts).release();
- });
+ checkGlobalVariableScope(Context);
+
+ return MDGlobalVariable::getTemporary(VMContext, Context, Name, LinkageName,
+ F, LineNumber, Ty, isLocalToUnit, false,
+ getConstantOrNull(Val), Decl).release();
}
DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
@@ -928,16 +605,17 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
unsigned LineNo, DITypeRef Ty,
bool AlwaysPreserve, unsigned Flags,
unsigned ArgNo) {
+ // FIXME: Why getNonCompileUnitScope()?
+ // FIXME: Why is "!Context" okay here?
+ // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT
+ // the only valid scopes)?
DIDescriptor Context(getNonCompileUnitScope(Scope));
assert((!Context || Context.isScope()) &&
"createLocalVariable should be called with a valid Context");
- Metadata *Elts[] = {HeaderBuilder::get(Tag)
- .concat(Name)
- .concat(LineNo | (ArgNo << 24))
- .concat(Flags)
- .get(VMContext),
- getNonCompileUnitScope(Scope), File, Ty};
- MDNode *Node = MDNode::get(VMContext, Elts);
+
+ auto *Node =
+ MDLocalVariable::get(VMContext, Tag, getNonCompileUnitScope(Scope), Name,
+ File, LineNo, Ty, ArgNo, Flags);
if (AlwaysPreserve) {
// The optimizer may remove local variable. If there is an interest
// to preserve variable info in such situation then stash it in a
@@ -946,18 +624,11 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
assert(Fn && "Missing subprogram for local variable");
PreservedVariables[Fn].emplace_back(Node);
}
- DIVariable RetVar(Node);
- assert(RetVar.isVariable() &&
- "createLocalVariable should return a valid DIVariable");
- return RetVar;
+ return Node;
}
DIExpression DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
- auto Header = HeaderBuilder::get(DW_TAG_expression);
- for (uint64_t I : Addr)
- Header.concat(I);
- Metadata *Elts[] = {Header.get(VMContext)};
- return DIExpression(MDNode::get(VMContext, Elts));
+ return MDExpression::get(VMContext, Addr);
}
DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
@@ -966,10 +637,10 @@ DIExpression DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
return createExpression(Addr);
}
-DIExpression DIBuilder::createBitPieceExpression(unsigned OffsetInBits,
- unsigned SizeInBits) {
- int64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBits, SizeInBits};
- return createExpression(Addr);
+DIExpression DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
+ unsigned SizeInBytes) {
+ uint64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBytes, SizeInBytes};
+ return MDExpression::get(VMContext, Addr);
}
DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
@@ -987,38 +658,6 @@ DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
Flags, isOptimized, Fn, TParams, Decl);
}
-static DISubprogram createFunctionHelper(
- LLVMContext &VMContext, DIDescriptor Context, StringRef Name,
- StringRef LinkageName, DIFile File, unsigned LineNo, DICompositeType Ty,
- bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, unsigned Flags,
- bool isOptimized, Function *Fn, MDNode *TParams, MDNode *Decl, MDNode *Vars,
- std::function<MDNode *(ArrayRef<Metadata *>)> CreateFunc) {
- assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
- "function types should be subroutines");
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subprogram)
- .concat(Name)
- .concat(Name)
- .concat(LinkageName)
- .concat(LineNo)
- .concat(isLocalToUnit)
- .concat(isDefinition)
- .concat(0)
- .concat(0)
- .concat(Flags)
- .concat(isOptimized)
- .concat(ScopeLine)
- .get(VMContext),
- File.getFileNode(),
- DIScope(getNonCompileUnitScope(Context)).getRef(), Ty,
- nullptr, getConstantOrNull(Fn), TParams, Decl, Vars};
-
- DISubprogram S(CreateFunc(Elts));
- assert(S.isSubprogram() &&
- "createFunction should return a valid DISubprogram");
- return S;
-}
-
-
DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
StringRef LinkageName, DIFile File,
unsigned LineNo, DICompositeType Ty,
@@ -1026,19 +665,18 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
unsigned ScopeLine, unsigned Flags,
bool isOptimized, Function *Fn,
MDNode *TParams, MDNode *Decl) {
- return createFunctionHelper(VMContext, Context, Name, LinkageName, File,
- LineNo, Ty, isLocalToUnit, isDefinition,
- ScopeLine, Flags, isOptimized, Fn, TParams, Decl,
- MDNode::getTemporary(VMContext, None).release(),
- [&](ArrayRef<Metadata *> Elts) -> MDNode *{
- MDNode *Node = MDNode::get(VMContext, Elts);
- // Create a named metadata so that we
- // do not lose this mdnode.
- if (isDefinition)
- AllSubprograms.push_back(Node);
- trackIfUnresolved(Node);
- return Node;
- });
+ assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type &&
+ "function types should be subroutines");
+ auto *Node = MDSubprogram::get(
+ VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name,
+ LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit, isDefinition,
+ ScopeLine, nullptr, 0, 0, Flags, isOptimized, getConstantOrNull(Fn),
+ TParams, Decl, MDNode::getTemporary(VMContext, None).release());
+
+ if (isDefinition)
+ AllSubprograms.push_back(Node);
+ trackIfUnresolved(Node);
+ return Node;
}
DISubprogram
@@ -1049,12 +687,11 @@ DIBuilder::createTempFunctionFwdDecl(DIDescriptor Context, StringRef Name,
unsigned ScopeLine, unsigned Flags,
bool isOptimized, Function *Fn,
MDNode *TParams, MDNode *Decl) {
- return createFunctionHelper(VMContext, Context, Name, LinkageName, File,
- LineNo, Ty, isLocalToUnit, isDefinition,
- ScopeLine, Flags, isOptimized, Fn, TParams, Decl,
- nullptr, [&](ArrayRef<Metadata *> Elts) {
- return MDNode::getTemporary(VMContext, Elts).release();
- });
+ return MDSubprogram::getTemporary(
+ VMContext, DIScope(getNonCompileUnitScope(Context)).getRef(), Name,
+ LinkageName, File.getFileNode(), LineNo, Ty, isLocalToUnit,
+ isDefinition, ScopeLine, nullptr, 0, 0, Flags, isOptimized,
+ getConstantOrNull(Fn), TParams, Decl, nullptr).release();
}
DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
@@ -1070,24 +707,13 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
assert(getNonCompileUnitScope(Context) &&
"Methods should have both a Context and a context that isn't "
"the compile unit.");
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_subprogram)
- .concat(Name)
- .concat(Name)
- .concat(LinkageName)
- .concat(LineNo)
- .concat(isLocalToUnit)
- .concat(isDefinition)
- .concat(VK)
- .concat(VIndex)
- .concat(Flags)
- .concat(isOptimized)
- .concat(LineNo)
- // FIXME: Do we want to use different scope/lines?
- .get(VMContext),
- F.getFileNode(), DIScope(Context).getRef(), Ty,
- VTableHolder.getRef(), getConstantOrNull(Fn), TParam,
- nullptr, nullptr};
- MDNode *Node = MDNode::get(VMContext, Elts);
+ // FIXME: Do we want to use different scope/lines?
+ auto *Node = MDSubprogram::get(
+ VMContext, DIScope(Context).getRef(), Name, LinkageName, F.getFileNode(),
+ LineNo, Ty, isLocalToUnit, isDefinition, LineNo, VTableHolder.getRef(),
+ VK, VIndex, Flags, isOptimized, getConstantOrNull(Fn), TParam, nullptr,
+ nullptr);
+
if (isDefinition)
AllSubprograms.push_back(Node);
DISubprogram S(Node);
@@ -1098,12 +724,8 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
DIFile File, unsigned LineNo) {
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_namespace)
- .concat(Name)
- .concat(LineNo)
- .get(VMContext),
- File.getFileNode(), getNonCompileUnitScope(Scope)};
- DINameSpace R(MDNode::get(VMContext, Elts));
+ DINameSpace R = MDNamespace::get(VMContext, getNonCompileUnitScope(Scope),
+ File.getFileNode(), Name, LineNo);
assert(R.Verify() &&
"createNameSpace should return a verifiable DINameSpace");
return R;
@@ -1112,11 +734,8 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
DIFile File,
unsigned Discriminator) {
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_lexical_block)
- .concat(Discriminator)
- .get(VMContext),
- File.getFileNode(), Scope};
- DILexicalBlockFile R(MDNode::get(VMContext, Elts));
+ DILexicalBlockFile R = MDLexicalBlockFile::get(
+ VMContext, Scope, File.getFileNode(), Discriminator);
assert(
R.Verify() &&
"createLexicalBlockFile should return a verifiable DILexicalBlockFile");
@@ -1125,22 +744,10 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
unsigned Line, unsigned Col) {
- // FIXME: This isn't thread safe nor the right way to defeat MDNode uniquing.
- // I believe the right way is to have a self-referential element in the node.
- // Also: why do we bother with line/column - they're not used and the
- // documentation (SourceLevelDebugging.rst) claims the line/col are necessary
- // for uniquing, yet then we have this other solution (because line/col were
- // inadequate) anyway. Remove all 3 and replace them with a self-reference.
-
- // Defeat MDNode uniquing for lexical blocks by using unique id.
- static unsigned int unique_id = 0;
- Metadata *Elts[] = {HeaderBuilder::get(dwarf::DW_TAG_lexical_block)
- .concat(Line)
- .concat(Col)
- .concat(unique_id++)
- .get(VMContext),
- File.getFileNode(), getNonCompileUnitScope(Scope)};
- DILexicalBlock R(MDNode::get(VMContext, Elts));
+ // Make these distinct, to avoid merging two lexical blocks on the same
+ // file/line/column.
+ DILexicalBlock R = MDLexicalBlock::getDistinct(
+ VMContext, getNonCompileUnitScope(Scope), File.getFileNode(), Line, Col);
assert(R.Verify() &&
"createLexicalBlock should return a verifiable DILexicalBlock");
return R;
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index 9c1dee0..4d867ef 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -33,11 +33,6 @@
#include <cstdlib>
using namespace llvm;
-// Handle the Pass registration stuff necessary to use DataLayout's.
-
-INITIALIZE_PASS(DataLayoutPass, "datalayout", "Data Layout", false, true)
-char DataLayoutPass::ID = 0;
-
//===----------------------------------------------------------------------===//
// Support for StructLayout
//===----------------------------------------------------------------------===//
@@ -155,8 +150,8 @@ DataLayout::InvalidPointerElem = { 0U, 0U, 0U, ~0U };
const char *DataLayout::getManglingComponent(const Triple &T) {
if (T.isOSBinFormatMachO())
return "-m:o";
- if (T.isOSWindows() && T.getArch() == Triple::x86 && T.isOSBinFormatCOFF())
- return "-m:w";
+ if (T.isOSWindows() && T.isOSBinFormatCOFF())
+ return T.getArch() == Triple::x86 ? "-m:x" : "-m:w";
return "-m:e";
}
@@ -221,6 +216,7 @@ static unsigned inBytes(unsigned Bits) {
}
void DataLayout::parseSpecifier(StringRef Desc) {
+ StringRepresentation = Desc;
while (!Desc.empty()) {
// Split at '-'.
std::pair<StringRef, StringRef> Split = split(Desc, '-');
@@ -259,6 +255,8 @@ void DataLayout::parseSpecifier(StringRef Desc) {
"Missing size specification for pointer in datalayout string");
Split = split(Rest, ':');
unsigned PointerMemSize = inBytes(getInt(Tok));
+ if (!PointerMemSize)
+ report_fatal_error("Invalid pointer size of 0 bytes");
// ABI alignment.
if (Rest.empty())
@@ -266,12 +264,18 @@ void DataLayout::parseSpecifier(StringRef Desc) {
"Missing alignment specification for pointer in datalayout string");
Split = split(Rest, ':');
unsigned PointerABIAlign = inBytes(getInt(Tok));
+ if (!isPowerOf2_64(PointerABIAlign))
+ report_fatal_error(
+ "Pointer ABI alignment must be a power of 2");
// Preferred alignment.
unsigned PointerPrefAlign = PointerABIAlign;
if (!Rest.empty()) {
Split = split(Rest, ':');
PointerPrefAlign = inBytes(getInt(Tok));
+ if (!isPowerOf2_64(PointerPrefAlign))
+ report_fatal_error(
+ "Pointer preferred alignment must be a power of 2");
}
setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign,
@@ -304,6 +308,9 @@ void DataLayout::parseSpecifier(StringRef Desc) {
"Missing alignment specification in datalayout string");
Split = split(Rest, ':');
unsigned ABIAlign = inBytes(getInt(Tok));
+ if (AlignType != AGGREGATE_ALIGN && !ABIAlign)
+ report_fatal_error(
+ "ABI alignment specification must be >0 for non-aggregate types");
// Preferred alignment.
unsigned PrefAlign = ABIAlign;
@@ -352,7 +359,10 @@ void DataLayout::parseSpecifier(StringRef Desc) {
ManglingMode = MM_Mips;
break;
case 'w':
- ManglingMode = MM_WINCOFF;
+ ManglingMode = MM_WinCOFF;
+ break;
+ case 'x':
+ ManglingMode = MM_WinCOFFX86;
break;
}
break;
@@ -367,13 +377,7 @@ DataLayout::DataLayout(const Module *M) : LayoutMap(nullptr) {
init(M);
}
-void DataLayout::init(const Module *M) {
- const DataLayout *Other = M->getDataLayout();
- if (Other)
- *this = *Other;
- else
- reset("");
-}
+void DataLayout::init(const Module *M) { *this = M->getDataLayout(); }
bool DataLayout::operator==(const DataLayout &Other) const {
bool Ret = BigEndian == Other.BigEndian &&
@@ -381,7 +385,7 @@ bool DataLayout::operator==(const DataLayout &Other) const {
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths &&
Alignments == Other.Alignments && Pointers == Other.Pointers;
- assert(Ret == (getStringRepresentation() == Other.getStringRepresentation()));
+ // Note: getStringRepresentation() might differs, it is not canonicalized
return Ret;
}
@@ -394,6 +398,10 @@ DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
report_fatal_error("Invalid ABI alignment, must be a 16bit integer");
if (!isUInt<16>(pref_align))
report_fatal_error("Invalid preferred alignment, must be a 16bit integer");
+ if (abi_align != 0 && !isPowerOf2_64(abi_align))
+ report_fatal_error("Invalid ABI alignment, must be a power of 2");
+ if (pref_align != 0 && !isPowerOf2_64(pref_align))
+ report_fatal_error("Invalid preferred alignment, must be a power of 2");
if (pref_align < abi_align)
report_fatal_error(
@@ -474,9 +482,7 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
// If we didn't find an integer alignment, fall back on most conservative.
if (AlignType == INTEGER_ALIGN) {
BestMatchIdx = LargestInt;
- } else {
- assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
-
+ } else if (AlignType == VECTOR_ALIGN) {
// By default, use natural alignment for vector types. This is consistent
// with what clang and llvm-gcc do.
unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
@@ -489,6 +495,19 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
}
}
+ // If we still couldn't find a reasonable default alignment, fall back
+ // to a simple heuristic that the alignment is the first power of two
+ // greater-or-equal to the store size of the type. This is a reasonable
+ // approximation of reality, and if the user wanted something less
+ // less conservative, they should have specified it explicitly in the data
+ // layout.
+ if (BestMatchIdx == -1) {
+ unsigned Align = getTypeStoreSize(Ty);
+ if (Align & (Align-1))
+ Align = NextPowerOf2(Align);
+ return Align;
+ }
+
// Since we got a "best match" index, just return it.
return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
: Alignments[BestMatchIdx].PrefAlign;
@@ -552,68 +571,6 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
return L;
}
-std::string DataLayout::getStringRepresentation() const {
- std::string Result;
- raw_string_ostream OS(Result);
-
- OS << (BigEndian ? "E" : "e");
-
- switch (ManglingMode) {
- case MM_None:
- break;
- case MM_ELF:
- OS << "-m:e";
- break;
- case MM_MachO:
- OS << "-m:o";
- break;
- case MM_WINCOFF:
- OS << "-m:w";
- break;
- case MM_Mips:
- OS << "-m:m";
- break;
- }
-
- for (const PointerAlignElem &PI : Pointers) {
- // Skip default.
- if (PI.AddressSpace == 0 && PI.ABIAlign == 8 && PI.PrefAlign == 8 &&
- PI.TypeByteWidth == 8)
- continue;
-
- OS << "-p";
- if (PI.AddressSpace) {
- OS << PI.AddressSpace;
- }
- OS << ":" << PI.TypeByteWidth*8 << ':' << PI.ABIAlign*8;
- if (PI.PrefAlign != PI.ABIAlign)
- OS << ':' << PI.PrefAlign*8;
- }
-
- for (const LayoutAlignElem &AI : Alignments) {
- if (std::find(std::begin(DefaultAlignments), std::end(DefaultAlignments),
- AI) != std::end(DefaultAlignments))
- continue;
- OS << '-' << (char)AI.AlignType;
- if (AI.TypeBitWidth)
- OS << AI.TypeBitWidth;
- OS << ':' << AI.ABIAlign*8;
- if (AI.ABIAlign != AI.PrefAlign)
- OS << ':' << AI.PrefAlign*8;
- }
-
- if (!LegalIntWidths.empty()) {
- OS << "-n" << (unsigned)LegalIntWidths[0];
-
- for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
- OS << ':' << (unsigned)LegalIntWidths[i];
- }
-
- if (StackNaturalAlign)
- OS << "-S" << StackNaturalAlign*8;
-
- return OS.str();
-}
unsigned DataLayout::getPointerABIAlignment(unsigned AS) const {
PointersTy::const_iterator I = findPointerLowerBound(AS);
@@ -829,18 +786,3 @@ unsigned DataLayout::getPreferredAlignmentLog(const GlobalVariable *GV) const {
return Log2_32(getPreferredAlignment(GV));
}
-DataLayoutPass::DataLayoutPass() : ImmutablePass(ID), DL("") {
- initializeDataLayoutPassPass(*PassRegistry::getPassRegistry());
-}
-
-DataLayoutPass::~DataLayoutPass() {}
-
-bool DataLayoutPass::doInitialization(Module &M) {
- DL.init(&M);
- return false;
-}
-
-bool DataLayoutPass::doFinalization(Module &M) {
- DL.reset("");
- return false;
-}
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 6590661..9a6b953 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -92,7 +92,7 @@ bool DIDescriptor::Verify() const {
DIObjCProperty(DbgNode).Verify() ||
DITemplateTypeParameter(DbgNode).Verify() ||
DITemplateValueParameter(DbgNode).Verify() ||
- DIImportedEntity(DbgNode).Verify() || DIExpression(DbgNode).Verify());
+ DIImportedEntity(DbgNode).Verify());
}
static Metadata *getField(const MDNode *DbgNode, unsigned Elt) {
@@ -155,21 +155,6 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const {
return dyn_cast_or_null<Function>(getConstantField(Elt));
}
-void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
- if (!DbgNode)
- return;
-
- if (Elt < DbgNode->getNumOperands()) {
- MDNode *Node = const_cast<MDNode *>(DbgNode);
- Node->replaceOperandWith(Elt, F ? ConstantAsMetadata::get(F) : nullptr);
- }
-}
-
-static unsigned DIVariableInlinedAtIndex = 4;
-MDNode *DIVariable::getInlinedAt() const {
- return getNodeField(DbgNode, DIVariableInlinedAtIndex);
-}
-
/// \brief Return the size reported by the variable's type.
unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) {
DIType Ty = getType().resolve(Map);
@@ -183,13 +168,6 @@ unsigned DIVariable::getSizeInBits(const DITypeIdentifierMap &Map) {
return Ty.getSizeInBits();
}
-uint64_t DIExpression::getElement(unsigned Idx) const {
- unsigned I = Idx + 1;
- assert(I < getNumHeaderFields() &&
- "non-existing complex address element requested");
- return getHeaderFieldAs<int64_t>(I);
-}
-
bool DIExpression::isBitPiece() const {
unsigned N = getNumElements();
return N >=3 && getElement(N-3) == dwarf::DW_OP_bit_piece;
@@ -205,206 +183,40 @@ uint64_t DIExpression::getBitPieceSize() const {
return getElement(getNumElements()-1);
}
-DIExpression::iterator DIExpression::begin() const {
- return DIExpression::iterator(*this);
-}
-
-DIExpression::iterator DIExpression::end() const {
- return DIExpression::iterator();
-}
-
-DIExpression::Operand DIExpression::Operand::getNext() const {
+DIExpression::iterator DIExpression::Operand::getNext() const {
iterator it(I);
- return *(++it);
-}
-
-//===----------------------------------------------------------------------===//
-// Predicates
-//===----------------------------------------------------------------------===//
-
-bool DIDescriptor::isSubroutineType() const {
- return DbgNode && getTag() == dwarf::DW_TAG_subroutine_type;
-}
-
-bool DIDescriptor::isBasicType() const {
- if (!DbgNode)
- return false;
- switch (getTag()) {
- case dwarf::DW_TAG_base_type:
- case dwarf::DW_TAG_unspecified_type:
- return true;
- default:
- return false;
- }
-}
-
-bool DIDescriptor::isDerivedType() const {
- if (!DbgNode)
- return false;
- switch (getTag()) {
- case dwarf::DW_TAG_typedef:
- case dwarf::DW_TAG_pointer_type:
- case dwarf::DW_TAG_ptr_to_member_type:
- case dwarf::DW_TAG_reference_type:
- case dwarf::DW_TAG_rvalue_reference_type:
- case dwarf::DW_TAG_const_type:
- case dwarf::DW_TAG_volatile_type:
- case dwarf::DW_TAG_restrict_type:
- case dwarf::DW_TAG_member:
- case dwarf::DW_TAG_inheritance:
- case dwarf::DW_TAG_friend:
- return true;
- default:
- // CompositeTypes are currently modelled as DerivedTypes.
- return isCompositeType();
- }
-}
-
-bool DIDescriptor::isCompositeType() const {
- if (!DbgNode)
- return false;
- switch (getTag()) {
- case dwarf::DW_TAG_array_type:
- case dwarf::DW_TAG_structure_type:
- case dwarf::DW_TAG_union_type:
- case dwarf::DW_TAG_enumeration_type:
- case dwarf::DW_TAG_subroutine_type:
- case dwarf::DW_TAG_class_type:
- return true;
- default:
- return false;
- }
-}
-
-bool DIDescriptor::isVariable() const {
- if (!DbgNode)
- return false;
- switch (getTag()) {
- case dwarf::DW_TAG_auto_variable:
- case dwarf::DW_TAG_arg_variable:
- return true;
- default:
- return false;
- }
-}
-
-bool DIDescriptor::isType() const {
- return isBasicType() || isCompositeType() || isDerivedType();
-}
-
-bool DIDescriptor::isSubprogram() const {
- return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
-}
-
-bool DIDescriptor::isGlobalVariable() const {
- return DbgNode && getTag() == dwarf::DW_TAG_variable;
-}
-
-bool DIDescriptor::isScope() const {
- if (!DbgNode)
- return false;
- switch (getTag()) {
- case dwarf::DW_TAG_compile_unit:
- case dwarf::DW_TAG_lexical_block:
- case dwarf::DW_TAG_subprogram:
- case dwarf::DW_TAG_namespace:
- case dwarf::DW_TAG_file_type:
- return true;
- default:
- break;
- }
- return isType();
-}
-
-bool DIDescriptor::isTemplateTypeParameter() const {
- return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
-}
-
-bool DIDescriptor::isTemplateValueParameter() const {
- return DbgNode && (getTag() == dwarf::DW_TAG_template_value_parameter ||
- getTag() == dwarf::DW_TAG_GNU_template_template_param ||
- getTag() == dwarf::DW_TAG_GNU_template_parameter_pack);
-}
-
-bool DIDescriptor::isCompileUnit() const {
- return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
-}
-
-bool DIDescriptor::isFile() const {
- return DbgNode && getTag() == dwarf::DW_TAG_file_type;
-}
-
-bool DIDescriptor::isNameSpace() const {
- return DbgNode && getTag() == dwarf::DW_TAG_namespace;
-}
-
-bool DIDescriptor::isLexicalBlockFile() const {
- return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
- DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 2;
-}
-
-bool DIDescriptor::isLexicalBlock() const {
- // FIXME: There are always exactly 4 header fields in DILexicalBlock, but
- // something relies on this returning true for DILexicalBlockFile.
- return DbgNode && getTag() == dwarf::DW_TAG_lexical_block &&
- DbgNode->getNumOperands() == 3 &&
- (getNumHeaderFields() == 2 || getNumHeaderFields() == 4);
-}
-
-bool DIDescriptor::isSubrange() const {
- return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
-}
-
-bool DIDescriptor::isEnumerator() const {
- return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
-}
-
-bool DIDescriptor::isObjCProperty() const {
- return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
-}
-
-bool DIDescriptor::isImportedEntity() const {
- return DbgNode && (getTag() == dwarf::DW_TAG_imported_module ||
- getTag() == dwarf::DW_TAG_imported_declaration);
-}
-
-bool DIDescriptor::isExpression() const {
- return DbgNode && (getTag() == dwarf::DW_TAG_expression);
+ return ++it;
}
//===----------------------------------------------------------------------===//
// Simple Descriptor Constructors and other Methods
//===----------------------------------------------------------------------===//
-void DIDescriptor::replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D) {
-
+void DIDescriptor::replaceAllUsesWith(LLVMContext &, DIDescriptor D) {
assert(DbgNode && "Trying to replace an unverified type!");
+ assert(DbgNode->isTemporary() && "Expected temporary node");
+ TempMDNode Temp(get());
// Since we use a TrackingVH for the node, its easy for clients to manufacture
// legitimate situations where they want to replaceAllUsesWith() on something
// which, due to uniquing, has merged with the source. We shield clients from
// this detail by allowing a value to be replaced with replaceAllUsesWith()
// itself.
- const MDNode *DN = D;
- if (DbgNode == DN) {
- SmallVector<Metadata *, 10> Ops(DbgNode->op_begin(), DbgNode->op_end());
- DN = MDNode::get(VMContext, Ops);
+ if (Temp.get() == D.get()) {
+ DbgNode = MDNode::replaceWithUniqued(std::move(Temp));
+ return;
}
- assert(DbgNode->isTemporary() && "Expected temporary node");
- auto *Node = const_cast<MDNode *>(DbgNode);
- Node->replaceAllUsesWith(const_cast<MDNode *>(DN));
- MDNode::deleteTemporary(Node);
- DbgNode = DN;
+ Temp->replaceAllUsesWith(D.get());
+ DbgNode = D.get();
}
void DIDescriptor::replaceAllUsesWith(MDNode *D) {
assert(DbgNode && "Trying to replace an unverified type!");
assert(DbgNode != D && "This replacement should always happen");
assert(DbgNode->isTemporary() && "Expected temporary node");
- auto *Node = const_cast<MDNode *>(DbgNode);
+ TempMDNode Node(get());
Node->replaceAllUsesWith(D);
- MDNode::deleteTemporary(Node);
}
bool DICompileUnit::Verify() const {
@@ -413,31 +225,10 @@ bool DICompileUnit::Verify() const {
// Don't bother verifying the compilation directory or producer string
// as those could be empty.
- if (getFilename().empty())
- return false;
-
- return DbgNode->getNumOperands() == 7 && getNumHeaderFields() == 8;
-}
-
-bool DIObjCProperty::Verify() const {
- if (!isObjCProperty())
- return false;
-
- // Don't worry about the rest of the strings for now.
- return DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 6;
-}
-
-/// \brief Check if a field at position Elt of a MDNode is a MDNode.
-static bool fieldIsMDNode(const MDNode *DbgNode, unsigned Elt) {
- Metadata *Fld = getField(DbgNode, Elt);
- return !Fld || isa<MDNode>(Fld);
+ return !getFilename().empty();
}
-/// \brief Check if a field at position Elt of a MDNode is a MDString.
-static bool fieldIsMDString(const MDNode *DbgNode, unsigned Elt) {
- Metadata *Fld = getField(DbgNode, Elt);
- return !Fld || isa<MDString>(Fld);
-}
+bool DIObjCProperty::Verify() const { return isObjCProperty(); }
/// \brief Check if a value can be a reference to a type.
static bool isTypeRef(const Metadata *MD) {
@@ -445,14 +236,7 @@ static bool isTypeRef(const Metadata *MD) {
return true;
if (auto *S = dyn_cast<MDString>(MD))
return !S->getString().empty();
- if (auto *N = dyn_cast<MDNode>(MD))
- return DIType(N).isType();
- return false;
-}
-
-/// \brief Check if referenced field might be a type.
-static bool fieldIsTypeRef(const MDNode *DbgNode, unsigned Elt) {
- return isTypeRef(dyn_cast_or_null<Metadata>(getField(DbgNode, Elt)));
+ return isa<MDType>(MD);
}
/// \brief Check if a value can be a ScopeRef.
@@ -461,14 +245,7 @@ static bool isScopeRef(const Metadata *MD) {
return true;
if (auto *S = dyn_cast<MDString>(MD))
return !S->getString().empty();
- if (auto *N = dyn_cast<MDNode>(MD))
- return DIScope(N).isScope();
- return false;
-}
-
-/// \brief Check if a field at position Elt of a MDNode can be a ScopeRef.
-static bool fieldIsScopeRef(const MDNode *DbgNode, unsigned Elt) {
- return isScopeRef(dyn_cast_or_null<Metadata>(getField(DbgNode, Elt)));
+ return isa<MDScope>(MD);
}
#ifndef NDEBUG
@@ -483,92 +260,81 @@ static bool isDescriptorRef(const Metadata *MD) {
#endif
bool DIType::Verify() const {
- if (!isType())
+ auto *N = dyn_cast_or_null<MDType>(DbgNode);
+ if (!N)
return false;
- // Make sure Context @ field 2 is MDNode.
- if (!fieldIsScopeRef(DbgNode, 2))
- return false;
-
- // FIXME: Sink this into the various subclass verifies.
- uint16_t Tag = getTag();
- if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
- Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
- Tag != dwarf::DW_TAG_ptr_to_member_type &&
- Tag != dwarf::DW_TAG_reference_type &&
- Tag != dwarf::DW_TAG_rvalue_reference_type &&
- Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type &&
- Tag != dwarf::DW_TAG_enumeration_type &&
- Tag != dwarf::DW_TAG_subroutine_type &&
- Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend &&
- getFilename().empty())
+ if (!isScopeRef(N->getScope()))
return false;
// DIType is abstract, it should be a BasicType, a DerivedType or
// a CompositeType.
if (isBasicType())
return DIBasicType(DbgNode).Verify();
- else if (isCompositeType())
+
+ // FIXME: Sink this into the various subclass verifies.
+ if (getFilename().empty()) {
+ // Check whether the filename is allowed to be empty.
+ uint16_t Tag = getTag();
+ if (Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
+ Tag != dwarf::DW_TAG_pointer_type &&
+ Tag != dwarf::DW_TAG_ptr_to_member_type &&
+ Tag != dwarf::DW_TAG_reference_type &&
+ Tag != dwarf::DW_TAG_rvalue_reference_type &&
+ Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type &&
+ Tag != dwarf::DW_TAG_enumeration_type &&
+ Tag != dwarf::DW_TAG_subroutine_type &&
+ Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend &&
+ Tag != dwarf::DW_TAG_structure_type && Tag != dwarf::DW_TAG_member &&
+ Tag != dwarf::DW_TAG_typedef)
+ return false;
+ }
+
+ if (isCompositeType())
return DICompositeType(DbgNode).Verify();
- else if (isDerivedType())
+ if (isDerivedType())
return DIDerivedType(DbgNode).Verify();
- else
- return false;
+ return false;
}
bool DIBasicType::Verify() const {
- return isBasicType() && DbgNode->getNumOperands() == 3 &&
- getNumHeaderFields() == 8;
+ return dyn_cast_or_null<MDBasicType>(DbgNode);
}
bool DIDerivedType::Verify() const {
- // Make sure DerivedFrom @ field 3 is TypeRef.
- if (!fieldIsTypeRef(DbgNode, 3))
+ auto *N = dyn_cast_or_null<MDDerivedTypeBase>(DbgNode);
+ if (!N)
return false;
- if (getTag() == dwarf::DW_TAG_ptr_to_member_type)
- // Make sure ClassType @ field 4 is a TypeRef.
- if (!fieldIsTypeRef(DbgNode, 4))
+ if (getTag() == dwarf::DW_TAG_ptr_to_member_type) {
+ auto *D = dyn_cast<MDDerivedType>(N);
+ if (!D)
return false;
-
- return isDerivedType() && DbgNode->getNumOperands() >= 4 &&
- DbgNode->getNumOperands() <= 8 && getNumHeaderFields() >= 7 &&
- getNumHeaderFields() <= 8;
+ if (!isTypeRef(D->getExtraData()))
+ return false;
+ }
+ return isTypeRef(N->getBaseType());
}
bool DICompositeType::Verify() const {
- if (!isCompositeType())
- return false;
-
- // Make sure DerivedFrom @ field 3 and ContainingType @ field 5 are TypeRef.
- if (!fieldIsTypeRef(DbgNode, 3))
- return false;
- if (!fieldIsTypeRef(DbgNode, 5))
- return false;
-
- // Make sure the type identifier at field 7 is MDString, it can be null.
- if (!fieldIsMDString(DbgNode, 7))
- return false;
-
- // A subroutine type can't be both & and &&.
- if (isLValueReference() && isRValueReference())
- return false;
-
- return DbgNode->getNumOperands() == 8 && getNumHeaderFields() == 8;
+ auto *N = dyn_cast_or_null<MDCompositeTypeBase>(DbgNode);
+ return N && isTypeRef(N->getBaseType()) && isTypeRef(N->getVTableHolder()) &&
+ !(isLValueReference() && isRValueReference());
}
bool DISubprogram::Verify() const {
- if (!isSubprogram())
+ auto *N = dyn_cast_or_null<MDSubprogram>(DbgNode);
+ if (!N)
return false;
- // Make sure context @ field 2 is a ScopeRef and type @ field 3 is a MDNode.
- if (!fieldIsScopeRef(DbgNode, 2))
+ if (!isScopeRef(N->getScope()))
return false;
- if (!fieldIsMDNode(DbgNode, 3))
- return false;
- // Containing type @ field 4.
- if (!fieldIsTypeRef(DbgNode, 4))
+
+ if (auto *Op = N->getType())
+ if (!isa<MDNode>(Op))
+ return false;
+
+ if (!isTypeRef(getContainingType()))
return false;
- // A subprogram can't be both & and &&.
if (isLValueReference() && isRValueReference())
return false;
@@ -603,164 +369,78 @@ bool DISubprogram::Verify() const {
}
}
}
- return DbgNode->getNumOperands() == 9 && getNumHeaderFields() == 12;
+
+ return true;
}
bool DIGlobalVariable::Verify() const {
- if (!isGlobalVariable())
- return false;
+ auto *N = dyn_cast_or_null<MDGlobalVariable>(DbgNode);
- if (getDisplayName().empty())
- return false;
- // Make sure context @ field 1 is an MDNode.
- if (!fieldIsMDNode(DbgNode, 1))
- return false;
- // Make sure that type @ field 3 is a DITypeRef.
- if (!fieldIsTypeRef(DbgNode, 3))
- return false;
- // Make sure StaticDataMemberDeclaration @ field 5 is MDNode.
- if (!fieldIsMDNode(DbgNode, 5))
- return false;
-
- return DbgNode->getNumOperands() == 6 && getNumHeaderFields() == 7;
-}
-
-bool DIVariable::Verify() const {
- if (!isVariable())
+ if (!N)
return false;
- // Make sure context @ field 1 is an MDNode.
- if (!fieldIsMDNode(DbgNode, 1))
- return false;
- // Make sure that type @ field 3 is a DITypeRef.
- if (!fieldIsTypeRef(DbgNode, 3))
+ if (N->getDisplayName().empty())
return false;
- // Check the number of header fields, which is common between complex and
- // simple variables.
- if (getNumHeaderFields() != 4)
- return false;
+ if (auto *Op = N->getScope())
+ if (!isa<MDNode>(Op))
+ return false;
- // Variable without an inline location.
- if (DbgNode->getNumOperands() == 4)
- return true;
+ if (auto *Op = N->getStaticDataMemberDeclaration())
+ if (!isa<MDNode>(Op))
+ return false;
- // Variable with an inline location.
- return getInlinedAt() != nullptr && DbgNode->getNumOperands() == 5;
+ return isTypeRef(N->getType());
}
-bool DIExpression::Verify() const {
- // Empty DIExpressions may be represented as a nullptr.
- if (!DbgNode)
- return true;
+bool DIVariable::Verify() const {
+ auto *N = dyn_cast_or_null<MDLocalVariable>(DbgNode);
- if (!(isExpression() && DbgNode->getNumOperands() == 1))
+ if (!N)
return false;
- for (auto Op : *this)
- switch (Op) {
- case DW_OP_bit_piece:
- // Must be the last element of the expression.
- return std::distance(Op.getBase(), DIHeaderFieldIterator()) == 3;
- case DW_OP_plus:
- if (std::distance(Op.getBase(), DIHeaderFieldIterator()) < 2)
- return false;
- break;
- case DW_OP_deref:
- break;
- default:
- // Other operators are not yet supported by the backend.
+ if (auto *Op = N->getScope())
+ if (!isa<MDNode>(Op))
return false;
- }
- return true;
+
+ return isTypeRef(N->getType());
}
bool DILocation::Verify() const {
- return DbgNode && isa<MDLocation>(DbgNode);
+ return dyn_cast_or_null<MDLocation>(DbgNode);
}
-
bool DINameSpace::Verify() const {
- if (!isNameSpace())
- return false;
- return DbgNode->getNumOperands() == 3 && getNumHeaderFields() == 3;
-}
-
-MDNode *DIFile::getFileNode() const { return getNodeField(DbgNode, 1); }
-
-bool DIFile::Verify() const {
- return isFile() && DbgNode->getNumOperands() == 2;
+ return dyn_cast_or_null<MDNamespace>(DbgNode);
}
-
+bool DIFile::Verify() const { return dyn_cast_or_null<MDFile>(DbgNode); }
bool DIEnumerator::Verify() const {
- return isEnumerator() && DbgNode->getNumOperands() == 1 &&
- getNumHeaderFields() == 3;
+ return dyn_cast_or_null<MDEnumerator>(DbgNode);
}
-
bool DISubrange::Verify() const {
- return isSubrange() && DbgNode->getNumOperands() == 1 &&
- getNumHeaderFields() == 3;
+ return dyn_cast_or_null<MDSubrange>(DbgNode);
}
-
bool DILexicalBlock::Verify() const {
- return isLexicalBlock() && DbgNode->getNumOperands() == 3 &&
- getNumHeaderFields() == 4;
+ return dyn_cast_or_null<MDLexicalBlock>(DbgNode);
}
-
bool DILexicalBlockFile::Verify() const {
- return isLexicalBlockFile() && DbgNode->getNumOperands() == 3 &&
- getNumHeaderFields() == 2;
+ return dyn_cast_or_null<MDLexicalBlockFile>(DbgNode);
}
-
bool DITemplateTypeParameter::Verify() const {
- return isTemplateTypeParameter() && DbgNode->getNumOperands() == 4 &&
- getNumHeaderFields() == 4;
+ return dyn_cast_or_null<MDTemplateTypeParameter>(DbgNode);
}
-
bool DITemplateValueParameter::Verify() const {
- return isTemplateValueParameter() && DbgNode->getNumOperands() == 5 &&
- getNumHeaderFields() == 4;
+ return dyn_cast_or_null<MDTemplateValueParameter>(DbgNode);
}
-
bool DIImportedEntity::Verify() const {
- return isImportedEntity() && DbgNode->getNumOperands() == 3 &&
- getNumHeaderFields() == 3;
-}
-
-MDNode *DIDerivedType::getObjCProperty() const {
- return getNodeField(DbgNode, 4);
+ return dyn_cast_or_null<MDImportedEntity>(DbgNode);
}
-MDString *DICompositeType::getIdentifier() const {
- return cast_or_null<MDString>(getField(DbgNode, 7));
-}
-
-#ifndef NDEBUG
-static void VerifySubsetOf(const MDNode *LHS, const MDNode *RHS) {
- for (unsigned i = 0; i != LHS->getNumOperands(); ++i) {
- // Skip the 'empty' list (that's a single i32 0, rather than truly empty).
- if (i == 0 && mdconst::hasa<ConstantInt>(LHS->getOperand(i)))
- continue;
- const MDNode *E = cast<MDNode>(LHS->getOperand(i));
- bool found = false;
- for (unsigned j = 0; !found && j != RHS->getNumOperands(); ++j)
- found = (E == cast<MDNode>(RHS->getOperand(j)));
- assert(found && "Losing a member during member list replacement");
- }
-}
-#endif
-
void DICompositeType::setArraysHelper(MDNode *Elements, MDNode *TParams) {
- TrackingMDNodeRef N(*this);
- if (Elements) {
-#ifndef NDEBUG
- // Check that the new list of members contains all the old members as well.
- if (const MDNode *El = cast_or_null<MDNode>(N->getOperand(4)))
- VerifySubsetOf(El, Elements);
-#endif
- N->replaceOperandWith(4, Elements);
- }
+ TypedTrackingMDRef<MDCompositeTypeBase> N(get());
+ if (Elements)
+ N->replaceElements(cast<MDTuple>(Elements));
if (TParams)
- N->replaceOperandWith(6, TParams);
+ N->replaceTemplateParams(cast<MDTuple>(TParams));
DbgNode = N;
}
@@ -774,8 +454,8 @@ DIScopeRef DIScope::getRef() const {
}
void DICompositeType::setContainingType(DICompositeType ContainingType) {
- TrackingMDNodeRef N(*this);
- N->replaceOperandWith(5, ContainingType.getRef());
+ TypedTrackingMDRef<MDCompositeTypeBase> N(get());
+ N->replaceVTableHolder(ContainingType.getRef());
DbgNode = N;
}
@@ -788,6 +468,13 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
return !DISubprogram(getContext()).describes(CurFn);
}
+Function *DISubprogram::getFunction() const {
+ if (auto *N = get())
+ if (auto *C = dyn_cast_or_null<ConstantAsMetadata>(N->getFunction()))
+ return dyn_cast<Function>(C->getValue());
+ return nullptr;
+}
+
bool DISubprogram::describes(const Function *F) {
assert(F && "Invalid function");
if (F == getFunction())
@@ -800,16 +487,8 @@ bool DISubprogram::describes(const Function *F) {
return false;
}
-MDNode *DISubprogram::getVariablesNodes() const {
- return getNodeField(DbgNode, 8);
-}
-
-DIArray DISubprogram::getVariables() const {
- return DIArray(getNodeField(DbgNode, 8));
-}
-
-Metadata *DITemplateValueParameter::getValue() const {
- return DbgNode->getOperand(3);
+GlobalVariable *DIGlobalVariable::getGlobal() const {
+ return dyn_cast_or_null<GlobalVariable>(getConstant());
}
DIScopeRef DIScope::getContext() const {
@@ -847,66 +526,25 @@ StringRef DIScope::getName() const {
}
StringRef DIScope::getFilename() const {
- if (!DbgNode)
- return StringRef();
- return ::getStringField(getNodeField(DbgNode, 1), 0);
+ if (auto *N = get())
+ return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 0);
+ return "";
}
StringRef DIScope::getDirectory() const {
- if (!DbgNode)
- return StringRef();
- return ::getStringField(getNodeField(DbgNode, 1), 1);
-}
-
-DIArray DICompileUnit::getEnumTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 7)
- return DIArray();
-
- return DIArray(getNodeField(DbgNode, 2));
-}
-
-DIArray DICompileUnit::getRetainedTypes() const {
- if (!DbgNode || DbgNode->getNumOperands() < 7)
- return DIArray();
-
- return DIArray(getNodeField(DbgNode, 3));
-}
-
-DIArray DICompileUnit::getSubprograms() const {
- if (!DbgNode || DbgNode->getNumOperands() < 7)
- return DIArray();
-
- return DIArray(getNodeField(DbgNode, 4));
-}
-
-DIArray DICompileUnit::getGlobalVariables() const {
- if (!DbgNode || DbgNode->getNumOperands() < 7)
- return DIArray();
-
- return DIArray(getNodeField(DbgNode, 5));
-}
-
-DIArray DICompileUnit::getImportedEntities() const {
- if (!DbgNode || DbgNode->getNumOperands() < 7)
- return DIArray();
-
- return DIArray(getNodeField(DbgNode, 6));
+ if (auto *N = get())
+ return ::getStringField(dyn_cast_or_null<MDNode>(N->getFile()), 1);
+ return "";
}
void DICompileUnit::replaceSubprograms(DIArray Subprograms) {
assert(Verify() && "Expected compile unit");
- if (Subprograms == getSubprograms())
- return;
-
- const_cast<MDNode *>(DbgNode)->replaceOperandWith(4, Subprograms);
+ get()->replaceSubprograms(cast_or_null<MDTuple>(Subprograms.get()));
}
void DICompileUnit::replaceGlobalVariables(DIArray GlobalVariables) {
assert(Verify() && "Expected compile unit");
- if (GlobalVariables == getGlobalVariables())
- return;
-
- const_cast<MDNode *>(DbgNode)->replaceOperandWith(5, GlobalVariables);
+ get()->replaceGlobalVariables(cast_or_null<MDTuple>(GlobalVariables.get()));
}
DILocation DILocation::copyWithNewScope(LLVMContext &Ctx,
@@ -927,31 +565,13 @@ unsigned DILocation::computeNewDiscriminator(LLVMContext &Ctx) {
DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope,
LLVMContext &VMContext) {
assert(DIVariable(DV).Verify() && "Expected a DIVariable");
- if (!InlinedScope)
- return cleanseInlinedVariable(DV, VMContext);
-
- // Insert inlined scope.
- SmallVector<Metadata *, 8> Elts(DV->op_begin(),
- DV->op_begin() + DIVariableInlinedAtIndex);
- Elts.push_back(InlinedScope);
-
- DIVariable Inlined(MDNode::get(VMContext, Elts));
- assert(Inlined.Verify() && "Expected to create a DIVariable");
- return Inlined;
+ return cast<MDLocalVariable>(DV)
+ ->withInline(cast_or_null<MDLocation>(InlinedScope));
}
DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
assert(DIVariable(DV).Verify() && "Expected a DIVariable");
- if (!DIVariable(DV).getInlinedAt())
- return DIVariable(DV);
-
- // Remove inlined scope.
- SmallVector<Metadata *, 8> Elts(DV->op_begin(),
- DV->op_begin() + DIVariableInlinedAtIndex);
-
- DIVariable Cleansed(MDNode::get(VMContext, Elts));
- assert(Cleansed.Verify() && "Expected to create a DIVariable");
- return Cleansed;
+ return cast<MDLocalVariable>(DV)->withoutInline();
}
DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
@@ -1075,6 +695,8 @@ void DebugInfoFinder::processModule(const Module &M) {
DIArray Imports = CU.getImportedEntities();
for (unsigned i = 0, e = Imports.getNumElements(); i != e; ++i) {
DIImportedEntity Import = DIImportedEntity(Imports.getElement(i));
+ if (!Import)
+ continue;
DIDescriptor Entity = Import.getEntity().resolve(TypeIdentifierMap);
if (Entity.isType())
processType(DIType(Entity));
@@ -1267,220 +889,9 @@ void DIDescriptor::dump() const {
}
void DIDescriptor::print(raw_ostream &OS) const {
- if (!DbgNode)
- return;
-
- if (const char *Tag = dwarf::TagString(getTag()))
- OS << "[ " << Tag << " ]";
-
- if (this->isSubrange()) {
- DISubrange(DbgNode).printInternal(OS);
- } else if (this->isCompileUnit()) {
- DICompileUnit(DbgNode).printInternal(OS);
- } else if (this->isFile()) {
- DIFile(DbgNode).printInternal(OS);
- } else if (this->isEnumerator()) {
- DIEnumerator(DbgNode).printInternal(OS);
- } else if (this->isBasicType()) {
- DIType(DbgNode).printInternal(OS);
- } else if (this->isDerivedType()) {
- DIDerivedType(DbgNode).printInternal(OS);
- } else if (this->isCompositeType()) {
- DICompositeType(DbgNode).printInternal(OS);
- } else if (this->isSubprogram()) {
- DISubprogram(DbgNode).printInternal(OS);
- } else if (this->isGlobalVariable()) {
- DIGlobalVariable(DbgNode).printInternal(OS);
- } else if (this->isVariable()) {
- DIVariable(DbgNode).printInternal(OS);
- } else if (this->isObjCProperty()) {
- DIObjCProperty(DbgNode).printInternal(OS);
- } else if (this->isNameSpace()) {
- DINameSpace(DbgNode).printInternal(OS);
- } else if (this->isScope()) {
- DIScope(DbgNode).printInternal(OS);
- } else if (this->isExpression()) {
- DIExpression(DbgNode).printInternal(OS);
- }
-}
-
-void DISubrange::printInternal(raw_ostream &OS) const {
- int64_t Count = getCount();
- if (Count != -1)
- OS << " [" << getLo() << ", " << Count - 1 << ']';
- else
- OS << " [unbounded]";
-}
-
-void DIScope::printInternal(raw_ostream &OS) const {
- OS << " [" << getDirectory() << "/" << getFilename() << ']';
-}
-
-void DICompileUnit::printInternal(raw_ostream &OS) const {
- DIScope::printInternal(OS);
- OS << " [";
- unsigned Lang = getLanguage();
- if (const char *LangStr = dwarf::LanguageString(Lang))
- OS << LangStr;
- else
- (OS << "lang 0x").write_hex(Lang);
- OS << ']';
-}
-
-void DIEnumerator::printInternal(raw_ostream &OS) const {
- OS << " [" << getName() << " :: " << getEnumValue() << ']';
-}
-
-void DIType::printInternal(raw_ostream &OS) const {
- if (!DbgNode)
+ if (!get())
return;
-
- StringRef Res = getName();
- if (!Res.empty())
- OS << " [" << Res << "]";
-
- // TODO: Print context?
-
- OS << " [line " << getLineNumber() << ", size " << getSizeInBits()
- << ", align " << getAlignInBits() << ", offset " << getOffsetInBits();
- if (isBasicType())
- if (const char *Enc =
- dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding()))
- OS << ", enc " << Enc;
- OS << "]";
-
- if (isPrivate())
- OS << " [private]";
- else if (isProtected())
- OS << " [protected]";
- else if (isPublic())
- OS << " [public]";
-
- if (isArtificial())
- OS << " [artificial]";
-
- if (isForwardDecl())
- OS << " [decl]";
- else if (getTag() == dwarf::DW_TAG_structure_type ||
- getTag() == dwarf::DW_TAG_union_type ||
- getTag() == dwarf::DW_TAG_enumeration_type ||
- getTag() == dwarf::DW_TAG_class_type)
- OS << " [def]";
- if (isVector())
- OS << " [vector]";
- if (isStaticMember())
- OS << " [static]";
-
- if (isLValueReference())
- OS << " [reference]";
-
- if (isRValueReference())
- OS << " [rvalue reference]";
-}
-
-void DIDerivedType::printInternal(raw_ostream &OS) const {
- DIType::printInternal(OS);
- OS << " [from " << getTypeDerivedFrom().getName() << ']';
-}
-
-void DICompositeType::printInternal(raw_ostream &OS) const {
- DIType::printInternal(OS);
- DIArray A = getElements();
- OS << " [" << A.getNumElements() << " elements]";
-}
-
-void DINameSpace::printInternal(raw_ostream &OS) const {
- StringRef Name = getName();
- if (!Name.empty())
- OS << " [" << Name << ']';
-
- OS << " [line " << getLineNumber() << ']';
-}
-
-void DISubprogram::printInternal(raw_ostream &OS) const {
- // TODO : Print context
- OS << " [line " << getLineNumber() << ']';
-
- if (isLocalToUnit())
- OS << " [local]";
-
- if (isDefinition())
- OS << " [def]";
-
- if (getScopeLineNumber() != getLineNumber())
- OS << " [scope " << getScopeLineNumber() << "]";
-
- if (isPrivate())
- OS << " [private]";
- else if (isProtected())
- OS << " [protected]";
- else if (isPublic())
- OS << " [public]";
-
- if (isLValueReference())
- OS << " [reference]";
-
- if (isRValueReference())
- OS << " [rvalue reference]";
-
- StringRef Res = getName();
- if (!Res.empty())
- OS << " [" << Res << ']';
-}
-
-void DIGlobalVariable::printInternal(raw_ostream &OS) const {
- StringRef Res = getName();
- if (!Res.empty())
- OS << " [" << Res << ']';
-
- OS << " [line " << getLineNumber() << ']';
-
- // TODO : Print context
-
- if (isLocalToUnit())
- OS << " [local]";
-
- if (isDefinition())
- OS << " [def]";
-}
-
-void DIVariable::printInternal(raw_ostream &OS) const {
- StringRef Res = getName();
- if (!Res.empty())
- OS << " [" << Res << ']';
-
- OS << " [line " << getLineNumber() << ']';
-}
-
-void DIExpression::printInternal(raw_ostream &OS) const {
- for (auto Op : *this) {
- OS << " [" << OperationEncodingString(Op);
- switch (Op) {
- case DW_OP_plus: {
- OS << " " << Op.getArg(1);
- break;
- }
- case DW_OP_bit_piece: {
- OS << " offset=" << Op.getArg(1) << ", size=" << Op.getArg(2);
- break;
- }
- case DW_OP_deref:
- // No arguments.
- break;
- default:
- llvm_unreachable("unhandled operation");
- }
- OS << "]";
- }
-}
-
-void DIObjCProperty::printInternal(raw_ostream &OS) const {
- StringRef Name = getObjCPropertyName();
- if (!Name.empty())
- OS << " [" << Name << ']';
-
- OS << " [line " << getLineNumber() << ", properties " << getUnsignedField(6)
- << ']';
+ get()->print(OS);
}
static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS,
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index cfb699a..5608589 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -129,7 +129,7 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
}
bool DiagnosticInfoOptimizationBase::isLocationAvailable() const {
- return getDebugLoc().isUnknown() == false;
+ return !getDebugLoc().isUnknown();
}
void DiagnosticInfoOptimizationBase::getLocation(StringRef *Filename,
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index 08f44e0..7010ceb 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <system_error>
using namespace llvm;
@@ -302,10 +303,12 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
// required to combine the edge counts that are contained in the GCDA file.
for (uint32_t BlockNo = 0; Count > 0; ++BlockNo) {
// The last block is always reserved for exit block
- if (BlockNo >= Blocks.size() - 1) {
+ if (BlockNo >= Blocks.size()) {
errs() << "Unexpected number of edges (in " << Name << ").\n";
return false;
}
+ if (BlockNo == Blocks.size() - 1)
+ errs() << "(" << Name << ") has arcs from exit block.\n";
GCOVBlock &Block = *Blocks[BlockNo];
for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End;
++EdgeNo) {
@@ -443,6 +446,7 @@ static uint32_t branchDiv(uint64_t Numerator, uint64_t Divisor) {
return Res;
}
+namespace {
struct formatBranchInfo {
formatBranchInfo(const GCOVOptions &Options, uint64_t Count, uint64_t Total)
: Options(Options), Count(Count), Total(Total) {}
@@ -466,7 +470,6 @@ static raw_ostream &operator<<(raw_ostream &OS, const formatBranchInfo &FBI) {
return OS;
}
-namespace {
class LineConsumer {
std::unique_ptr<MemoryBuffer> Buffer;
StringRef Remaining;
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 54197d9..5a6adb3 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -42,10 +42,6 @@ void GlobalValue::Dematerialize() {
getParent()->Dematerialize(this);
}
-const DataLayout *GlobalValue::getDataLayout() const {
- return getParent()->getDataLayout();
-}
-
/// Override destroyConstant to make sure it doesn't get called on
/// GlobalValue's because they shouldn't be treated like other constants.
void GlobalValue::destroyConstant() {
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 5b73561..b456d9f 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -75,7 +75,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
ConstraintCodeVector *pCodes = &Codes;
// Initialize
- isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+ isMultipleAlternative = multipleAlternativeCount > 1;
if (isMultipleAlternative) {
multipleAlternatives.resize(multipleAlternativeCount);
pCodes = &multipleAlternatives[0].Codes;
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 92c6e9f..7d9bd7e 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -32,10 +32,6 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
}
}
-const DataLayout *Instruction::getDataLayout() const {
- return getParent()->getDataLayout();
-}
-
Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
BasicBlock *InsertAtEnd)
: User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) {
@@ -58,6 +54,10 @@ void Instruction::setParent(BasicBlock *P) {
Parent = P;
}
+const Module *Instruction::getModule() const {
+ return getParent()->getModule();
+}
+
void Instruction::removeFromParent() {
getParent()->getInstList().remove(this);
}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 7136923..af2aeb9 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -841,41 +841,19 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
return Amt;
}
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
- const Twine &Name, Instruction *InsertBefore)
- : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
- getAISize(Ty->getContext(), ArraySize), InsertBefore) {
- setAlignment(0);
- assert(!Ty->isVoidTy() && "Cannot allocate void!");
- setName(Name);
-}
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name, Instruction *InsertBefore)
+ : AllocaInst(Ty, /*ArraySize=*/nullptr, Name, InsertBefore) {}
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
- const Twine &Name, BasicBlock *InsertAtEnd)
- : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
- getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
- setAlignment(0);
- assert(!Ty->isVoidTy() && "Cannot allocate void!");
- setName(Name);
-}
+AllocaInst::AllocaInst(Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
+ : AllocaInst(Ty, /*ArraySize=*/nullptr, Name, InsertAtEnd) {}
-AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name,
Instruction *InsertBefore)
- : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
- getAISize(Ty->getContext(), nullptr), InsertBefore) {
- setAlignment(0);
- assert(!Ty->isVoidTy() && "Cannot allocate void!");
- setName(Name);
-}
+ : AllocaInst(Ty, ArraySize, /*Align=*/0, Name, InsertBefore) {}
-AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
+AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name,
BasicBlock *InsertAtEnd)
- : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
- getAISize(Ty->getContext(), nullptr), InsertAtEnd) {
- setAlignment(0);
- assert(!Ty->isVoidTy() && "Cannot allocate void!");
- setName(Name);
-}
+ : AllocaInst(Ty, ArraySize, /*Align=*/0, Name, InsertAtEnd) {}
AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
const Twine &Name, Instruction *InsertBefore)
@@ -942,67 +920,27 @@ void LoadInst::AssertOK() {
}
LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertBef) {
- setVolatile(false);
- setAlignment(0);
- setAtomic(NotAtomic);
- AssertOK();
- setName(Name);
-}
+ : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertBef) {}
LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertAE) {
- setVolatile(false);
- setAlignment(0);
- setAtomic(NotAtomic);
- AssertOK();
- setName(Name);
-}
+ : LoadInst(Ptr, Name, /*isVolatile=*/false, InsertAE) {}
LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
Instruction *InsertBef)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertBef) {
- setVolatile(isVolatile);
- setAlignment(0);
- setAtomic(NotAtomic);
- AssertOK();
- setName(Name);
-}
+ : LoadInst(Ptr, Name, isVolatile, /*Align=*/0, InsertBef) {}
LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
BasicBlock *InsertAE)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertAE) {
- setVolatile(isVolatile);
- setAlignment(0);
- setAtomic(NotAtomic);
- AssertOK();
- setName(Name);
-}
+ : LoadInst(Ptr, Name, isVolatile, /*Align=*/0, InsertAE) {}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, Instruction *InsertBef)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertBef) {
- setVolatile(isVolatile);
- setAlignment(Align);
- setAtomic(NotAtomic);
- AssertOK();
- setName(Name);
-}
+ : LoadInst(Ptr, Name, isVolatile, Align, NotAtomic, CrossThread,
+ InsertBef) {}
-LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
unsigned Align, BasicBlock *InsertAE)
- : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
- Load, Ptr, InsertAE) {
- setVolatile(isVolatile);
- setAlignment(Align);
- setAtomic(NotAtomic);
- AssertOK();
- setName(Name);
+ : LoadInst(Ptr, Name, isVolatile, Align, NotAtomic, CrossThread, InsertAE) {
}
LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
@@ -1097,60 +1035,29 @@ void StoreInst::AssertOK() {
"Alignment required for atomic store");
}
-
StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
- : Instruction(Type::getVoidTy(val->getContext()), Store,
- OperandTraits<StoreInst>::op_begin(this),
- OperandTraits<StoreInst>::operands(this),
- InsertBefore) {
- Op<0>() = val;
- Op<1>() = addr;
- setVolatile(false);
- setAlignment(0);
- setAtomic(NotAtomic);
- AssertOK();
-}
+ : StoreInst(val, addr, /*isVolatile=*/false, InsertBefore) {}
StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
- : Instruction(Type::getVoidTy(val->getContext()), Store,
- OperandTraits<StoreInst>::op_begin(this),
- OperandTraits<StoreInst>::operands(this),
- InsertAtEnd) {
- Op<0>() = val;
- Op<1>() = addr;
- setVolatile(false);
- setAlignment(0);
- setAtomic(NotAtomic);
- AssertOK();
-}
+ : StoreInst(val, addr, /*isVolatile=*/false, InsertAtEnd) {}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
Instruction *InsertBefore)
- : Instruction(Type::getVoidTy(val->getContext()), Store,
- OperandTraits<StoreInst>::op_begin(this),
- OperandTraits<StoreInst>::operands(this),
- InsertBefore) {
- Op<0>() = val;
- Op<1>() = addr;
- setVolatile(isVolatile);
- setAlignment(0);
- setAtomic(NotAtomic);
- AssertOK();
-}
+ : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertBefore) {}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
- unsigned Align, Instruction *InsertBefore)
- : Instruction(Type::getVoidTy(val->getContext()), Store,
- OperandTraits<StoreInst>::op_begin(this),
- OperandTraits<StoreInst>::operands(this),
- InsertBefore) {
- Op<0>() = val;
- Op<1>() = addr;
- setVolatile(isVolatile);
- setAlignment(Align);
- setAtomic(NotAtomic);
- AssertOK();
-}
+ BasicBlock *InsertAtEnd)
+ : StoreInst(val, addr, isVolatile, /*Align=*/0, InsertAtEnd) {}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align,
+ Instruction *InsertBefore)
+ : StoreInst(val, addr, isVolatile, Align, NotAtomic, CrossThread,
+ InsertBefore) {}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align,
+ BasicBlock *InsertAtEnd)
+ : StoreInst(val, addr, isVolatile, Align, NotAtomic, CrossThread,
+ InsertAtEnd) {}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
unsigned Align, AtomicOrdering Order,
@@ -1169,34 +1076,6 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
}
StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
- BasicBlock *InsertAtEnd)
- : Instruction(Type::getVoidTy(val->getContext()), Store,
- OperandTraits<StoreInst>::op_begin(this),
- OperandTraits<StoreInst>::operands(this),
- InsertAtEnd) {
- Op<0>() = val;
- Op<1>() = addr;
- setVolatile(isVolatile);
- setAlignment(0);
- setAtomic(NotAtomic);
- AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
- unsigned Align, BasicBlock *InsertAtEnd)
- : Instruction(Type::getVoidTy(val->getContext()), Store,
- OperandTraits<StoreInst>::op_begin(this),
- OperandTraits<StoreInst>::operands(this),
- InsertAtEnd) {
- Op<0>() = val;
- Op<1>() = addr;
- setVolatile(isVolatile);
- setAlignment(Align);
- setAtomic(NotAtomic);
- AssertOK();
-}
-
-StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
unsigned Align, AtomicOrdering Order,
SynchronizationScope SynchScope,
BasicBlock *InsertAtEnd)
@@ -2169,21 +2048,15 @@ bool CastInst::isNoopCast(Type *IntPtrTy) const {
return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
}
-bool CastInst::isNoopCast(const DataLayout *DL) const {
- if (!DL) {
- // Assume maximum pointer size.
- return isNoopCast(Type::getInt64Ty(getContext()));
- }
-
+bool CastInst::isNoopCast(const DataLayout &DL) const {
Type *PtrOpTy = nullptr;
if (getOpcode() == Instruction::PtrToInt)
PtrOpTy = getOperand(0)->getType();
else if (getOpcode() == Instruction::IntToPtr)
PtrOpTy = getType();
- Type *IntPtrTy = PtrOpTy
- ? DL->getIntPtrType(PtrOpTy)
- : DL->getIntPtrType(getContext(), 0);
+ Type *IntPtrTy =
+ PtrOpTy ? DL.getIntPtrType(PtrOpTy) : DL.getIntPtrType(getContext(), 0);
return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
}
@@ -2656,44 +2529,38 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) {
// Run through the possibilities ...
if (DestTy->isIntegerTy()) { // Casting to integral
- if (SrcTy->isIntegerTy()) { // Casting from integral
+ if (SrcTy->isIntegerTy()) // Casting from integral
return true;
- } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ if (SrcTy->isFloatingPointTy()) // Casting from floating pt
return true;
- } else if (SrcTy->isVectorTy()) { // Casting from vector
+ if (SrcTy->isVectorTy()) // Casting from vector
return DestBits == SrcBits;
- } else { // Casting from something else
- return SrcTy->isPointerTy();
- }
- } else if (DestTy->isFloatingPointTy()) { // Casting to floating pt
- if (SrcTy->isIntegerTy()) { // Casting from integral
+ // Casting from something else
+ return SrcTy->isPointerTy();
+ }
+ if (DestTy->isFloatingPointTy()) { // Casting to floating pt
+ if (SrcTy->isIntegerTy()) // Casting from integral
return true;
- } else if (SrcTy->isFloatingPointTy()) { // Casting from floating pt
+ if (SrcTy->isFloatingPointTy()) // Casting from floating pt
return true;
- } else if (SrcTy->isVectorTy()) { // Casting from vector
+ if (SrcTy->isVectorTy()) // Casting from vector
return DestBits == SrcBits;
- } else { // Casting from something else
- return false;
- }
- } else if (DestTy->isVectorTy()) { // Casting to vector
+ // Casting from something else
+ return false;
+ }
+ if (DestTy->isVectorTy()) // Casting to vector
return DestBits == SrcBits;
- } else if (DestTy->isPointerTy()) { // Casting to pointer
- if (SrcTy->isPointerTy()) { // Casting from pointer
+ if (DestTy->isPointerTy()) { // Casting to pointer
+ if (SrcTy->isPointerTy()) // Casting from pointer
return true;
- } else if (SrcTy->isIntegerTy()) { // Casting from integral
- return true;
- } else { // Casting from something else
- return false;
- }
- } else if (DestTy->isX86_MMXTy()) {
- if (SrcTy->isVectorTy()) {
+ return SrcTy->isIntegerTy(); // Casting from integral
+ }
+ if (DestTy->isX86_MMXTy()) {
+ if (SrcTy->isVectorTy())
return DestBits == SrcBits; // 64-bit vector to MMX
- } else {
- return false;
- }
- } else { // Casting to something else
return false;
- }
+ } // Casting to something else
+ return false;
}
bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) {
@@ -2737,13 +2604,13 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) {
}
bool CastInst::isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy,
- const DataLayout *DL) {
+ const DataLayout &DL) {
if (auto *PtrTy = dyn_cast<PointerType>(SrcTy))
if (auto *IntTy = dyn_cast<IntegerType>(DestTy))
- return DL && IntTy->getBitWidth() == DL->getPointerTypeSizeInBits(PtrTy);
+ return IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy);
if (auto *PtrTy = dyn_cast<PointerType>(DestTy))
if (auto *IntTy = dyn_cast<IntegerType>(SrcTy))
- return DL && IntTy->getBitWidth() == DL->getPointerTypeSizeInBits(PtrTy);
+ return IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy);
return isBitCastable(SrcTy, DestTy);
}
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 4631246..e380665 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -240,12 +240,12 @@ template <> struct MDNodeKeyImpl<MDLocation> {
: Line(Line), Column(Column), Scope(Scope), InlinedAt(InlinedAt) {}
MDNodeKeyImpl(const MDLocation *L)
- : Line(L->getLine()), Column(L->getColumn()), Scope(L->getScope()),
- InlinedAt(L->getInlinedAt()) {}
+ : Line(L->getLine()), Column(L->getColumn()), Scope(L->getRawScope()),
+ InlinedAt(L->getRawInlinedAt()) {}
bool isKeyOf(const MDLocation *RHS) const {
return Line == RHS->getLine() && Column == RHS->getColumn() &&
- Scope == RHS->getScope() && InlinedAt == RHS->getInlinedAt();
+ Scope == RHS->getRawScope() && InlinedAt == RHS->getRawInlinedAt();
}
unsigned getHashValue() const {
return hash_combine(Line, Column, Scope, InlinedAt);
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index fa8d50e..9a365d1 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -652,7 +652,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
// are already checked are still available.
checkAnalysis = true;
} else
- // Do not schedule this analysis. Lower level analsyis
+ // Do not schedule this analysis. Lower level analysis
// passes are run on the fly.
delete AnalysisPass;
}
diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp
index 5eeb797..a0e1b25 100644
--- a/lib/IR/Mangler.cpp
+++ b/lib/IR/Mangler.cpp
@@ -73,7 +73,7 @@ static bool hasByteCountSuffix(CallingConv::ID CC) {
/// Microsoft fastcall and stdcall functions require a suffix on their name
/// indicating the number of words of arguments they take.
static void addByteCountSuffix(raw_ostream &OS, const Function *F,
- const DataLayout &TD) {
+ const DataLayout &DL) {
// Calculate arguments size total.
unsigned ArgWords = 0;
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
@@ -83,8 +83,8 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F,
if (AI->hasByValOrInAllocaAttr())
Ty = cast<PointerType>(Ty)->getElementType();
// Size should be aligned to pointer size.
- unsigned PtrSize = TD.getPointerSize();
- ArgWords += RoundUpToAlignment(TD.getTypeAllocSize(Ty), PtrSize);
+ unsigned PtrSize = DL.getPointerSize();
+ ArgWords += RoundUpToAlignment(DL.getTypeAllocSize(Ty), PtrSize);
}
OS << '@' << ArgWords;
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index b0abe8c..3e8f91f 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -365,31 +365,11 @@ void Module::addModuleFlag(MDNode *Node) {
void Module::setDataLayout(StringRef Desc) {
DL.reset(Desc);
-
- if (Desc.empty()) {
- DataLayoutStr = "";
- } else {
- DataLayoutStr = DL.getStringRepresentation();
- // DataLayoutStr is now equivalent to Desc, but since the representation
- // is not unique, they may not be identical.
- }
}
-void Module::setDataLayout(const DataLayout *Other) {
- if (!Other) {
- DataLayoutStr = "";
- DL.reset("");
- } else {
- DL = *Other;
- DataLayoutStr = DL.getStringRepresentation();
- }
-}
+void Module::setDataLayout(const DataLayout &Other) { DL = Other; }
-const DataLayout *Module::getDataLayout() const {
- if (DataLayoutStr.empty())
- return nullptr;
- return &DL;
-}
+const DataLayout &Module::getDataLayout() const { return DL; }
//===----------------------------------------------------------------------===//
// Methods to control the materialization of GlobalValues in the Module.
@@ -433,6 +413,12 @@ std::error_code Module::materializeAllPermanently() {
return std::error_code();
}
+std::error_code Module::materializeMetadata() {
+ if (!Materializer)
+ return std::error_code();
+ return Materializer->materializeMetadata();
+}
+
//===----------------------------------------------------------------------===//
// Other module related stuff.
//
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
index e2fb8f8..1d2b808 100644
--- a/lib/IR/TypeFinder.cpp
+++ b/lib/IR/TypeFinder.cpp
@@ -68,7 +68,7 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
// instructions with this loop.)
for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
OI != OE; ++OI)
- if (!isa<Instruction>(OI))
+ if (*OI && !isa<Instruction>(OI))
incorporateValue(*OI);
// Incorporate types hiding in metadata.
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 7d205f9..78bfca4 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
@@ -69,15 +70,13 @@ Value::~Value() {
#ifndef NDEBUG // Only in -g mode...
// Check to make sure that there are no uses of this value that are still
// around when the value is destroyed. If there are, then we have a dangling
- // reference and something is wrong. This code is here to print out what is
- // still being referenced. The value in question should be printed as
- // a <badref>
+ // reference and something is wrong. This code is here to print out where
+ // the value is still being referenced.
//
if (!use_empty()) {
dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n";
- for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
- dbgs() << "Use still stuck around after Def is destroyed:"
- << **I << "\n";
+ for (auto *U : users())
+ dbgs() << "Use still stuck around after Def is destroyed:" << *U << "\n";
}
#endif
assert(use_empty() && "Uses remain when a value is destroyed!");
@@ -482,7 +481,7 @@ Value *Value::stripInBoundsOffsets() {
///
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
-static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
+static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
SmallPtrSetImpl<const Value *> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
@@ -497,17 +496,14 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
// to a type of smaller size (or the same size), and the alignment
// is at least as large as for the resulting pointer type, then
// we can look through the bitcast.
- if (DL)
- if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) {
- Type *STy = BC->getSrcTy()->getPointerElementType(),
- *DTy = BC->getDestTy()->getPointerElementType();
- if (STy->isSized() && DTy->isSized() &&
- (DL->getTypeStoreSize(STy) >=
- DL->getTypeStoreSize(DTy)) &&
- (DL->getABITypeAlignment(STy) >=
- DL->getABITypeAlignment(DTy)))
- return isDereferenceablePointer(BC->getOperand(0), DL, Visited);
- }
+ if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) {
+ Type *STy = BC->getSrcTy()->getPointerElementType(),
+ *DTy = BC->getDestTy()->getPointerElementType();
+ if (STy->isSized() && DTy->isSized() &&
+ (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
+ (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
+ return isDereferenceablePointer(BC->getOperand(0), DL, Visited);
+ }
// Global variables which can't collapse to null are ok.
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
@@ -520,7 +516,7 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
return true;
else if (uint64_t Bytes = A->getDereferenceableBytes()) {
Type *Ty = V->getType()->getPointerElementType();
- if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes)
+ if (Ty->isSized() && DL.getTypeStoreSize(Ty) <= Bytes)
return true;
}
@@ -532,7 +528,7 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
if (ImmutableCallSite CS = V) {
if (uint64_t Bytes = CS.getDereferenceableBytes(0)) {
Type *Ty = V->getType()->getPointerElementType();
- if (Ty->isSized() && DL && DL->getTypeStoreSize(Ty) <= Bytes)
+ if (Ty->isSized() && DL.getTypeStoreSize(Ty) <= Bytes)
return true;
}
}
@@ -586,15 +582,15 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
return false;
}
-bool Value::isDereferenceablePointer(const DataLayout *DL) const {
+bool Value::isDereferenceablePointer(const DataLayout &DL) const {
// When dereferenceability information is provided by a dereferenceable
// attribute, we know exactly how many bytes are dereferenceable. If we can
// determine the exact offset to the attributed variable, we can use that
// information here.
Type *Ty = getType()->getPointerElementType();
- if (Ty->isSized() && DL) {
- APInt Offset(DL->getTypeStoreSizeInBits(getType()), 0);
- const Value *BV = stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
+ if (Ty->isSized()) {
+ APInt Offset(DL.getTypeStoreSizeInBits(getType()), 0);
+ const Value *BV = stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
APInt DerefBytes(Offset.getBitWidth(), 0);
if (const Argument *A = dyn_cast<Argument>(BV))
@@ -603,7 +599,7 @@ bool Value::isDereferenceablePointer(const DataLayout *DL) const {
DerefBytes = CS.getDereferenceableBytes(0);
if (DerefBytes.getBoolValue() && Offset.isNonNegative()) {
- if (DerefBytes.uge(Offset + DL->getTypeStoreSize(Ty)))
+ if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty)))
return true;
}
}
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index d01e138..fcf48c4 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -78,7 +78,7 @@
#include <cstdarg>
using namespace llvm;
-static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(false));
+static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(true));
namespace {
struct VerifierSupport {
@@ -87,11 +87,13 @@ struct VerifierSupport {
/// \brief Track the brokenness of the module while recursively visiting.
bool Broken;
+ bool EverBroken;
explicit VerifierSupport(raw_ostream &OS)
- : OS(OS), M(nullptr), Broken(false) {}
+ : OS(OS), M(nullptr), Broken(false), EverBroken(false) {}
- void WriteValue(const Value *V) {
+private:
+ void Write(const Value *V) {
if (!V)
return;
if (isa<Instruction>(V)) {
@@ -102,81 +104,61 @@ struct VerifierSupport {
}
}
- void WriteMetadata(const Metadata *MD) {
+ void Write(const Metadata *MD) {
if (!MD)
return;
- MD->printAsOperand(OS, true, M);
+ MD->print(OS, M);
+ OS << '\n';
+ }
+
+ void Write(const NamedMDNode *NMD) {
+ if (!NMD)
+ return;
+ NMD->print(OS);
OS << '\n';
}
- void WriteType(Type *T) {
+ void Write(Type *T) {
if (!T)
return;
OS << ' ' << *T;
}
- void WriteComdat(const Comdat *C) {
+ void Write(const Comdat *C) {
if (!C)
return;
OS << *C;
}
- // CheckFailed - A check failed, so print out the condition and the message
- // that failed. This provides a nice place to put a breakpoint if you want
- // to see why something is not correct.
- void CheckFailed(const Twine &Message, const Value *V1 = nullptr,
- const Value *V2 = nullptr, const Value *V3 = nullptr,
- const Value *V4 = nullptr) {
- OS << Message.str() << "\n";
- WriteValue(V1);
- WriteValue(V2);
- WriteValue(V3);
- WriteValue(V4);
- Broken = true;
- }
-
- void CheckFailed(const Twine &Message, const Metadata *V1, const Metadata *V2,
- const Metadata *V3 = nullptr, const Metadata *V4 = nullptr) {
- OS << Message.str() << "\n";
- WriteMetadata(V1);
- WriteMetadata(V2);
- WriteMetadata(V3);
- WriteMetadata(V4);
- Broken = true;
- }
-
- void CheckFailed(const Twine &Message, const Metadata *V1,
- const Value *V2 = nullptr) {
- OS << Message.str() << "\n";
- WriteMetadata(V1);
- WriteValue(V2);
- Broken = true;
- }
-
- void CheckFailed(const Twine &Message, const Value *V1, Type *T2,
- const Value *V3 = nullptr) {
- OS << Message.str() << "\n";
- WriteValue(V1);
- WriteType(T2);
- WriteValue(V3);
- Broken = true;
- }
-
- void CheckFailed(const Twine &Message, Type *T1, Type *T2 = nullptr,
- Type *T3 = nullptr) {
- OS << Message.str() << "\n";
- WriteType(T1);
- WriteType(T2);
- WriteType(T3);
- Broken = true;
- }
-
- void CheckFailed(const Twine &Message, const Comdat *C) {
- OS << Message.str() << "\n";
- WriteComdat(C);
- Broken = true;
+ template <typename T1, typename... Ts>
+ void WriteTs(const T1 &V1, const Ts &... Vs) {
+ Write(V1);
+ WriteTs(Vs...);
+ }
+
+ template <typename... Ts> void WriteTs() {}
+
+public:
+ /// \brief A check failed, so printout out the condition and the message.
+ ///
+ /// This provides a nice place to put a breakpoint if you want to see why
+ /// something is not correct.
+ void CheckFailed(const Twine &Message) {
+ OS << Message << '\n';
+ EverBroken = Broken = true;
+ }
+
+ /// \brief A check failed (with values to print).
+ ///
+ /// This calls the Message-only version so that the above is easier to set a
+ /// breakpoint on.
+ template <typename T1, typename... Ts>
+ void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) {
+ CheckFailed(Message);
+ WriteTs(V1, Vs...);
}
};
+
class Verifier : public InstVisitor<Verifier>, VerifierSupport {
friend class InstVisitor<Verifier>;
@@ -198,14 +180,18 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// personality function.
const Value *PersonalityFn;
- /// \brief Whether we've seen a call to @llvm.frameallocate in this function
+ /// \brief Whether we've seen a call to @llvm.frameescape in this function
/// already.
- bool SawFrameAllocate;
+ bool SawFrameEscape;
+
+ /// Stores the count of how many objects were passed to llvm.frameescape for a
+ /// given function and the largest index passed to llvm.framerecover.
+ DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
public:
- explicit Verifier(raw_ostream &OS = dbgs())
+ explicit Verifier(raw_ostream &OS)
: VerifierSupport(OS), Context(nullptr), PersonalityFn(nullptr),
- SawFrameAllocate(false) {}
+ SawFrameEscape(false) {}
bool verify(const Function &F) {
M = F.getParent();
@@ -240,7 +226,7 @@ public:
visit(const_cast<Function &>(F));
InstsInThisBlock.clear();
PersonalityFn = nullptr;
- SawFrameAllocate = false;
+ SawFrameEscape = false;
return !Broken;
}
@@ -259,6 +245,10 @@ public:
visitFunction(*I);
}
+ // Now that we've visited every function, verify that we never asked to
+ // recover a frame index that wasn't escaped.
+ verifyFrameRecoverIndices();
+
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
visitGlobalVariable(*I);
@@ -278,6 +268,9 @@ public:
visitModuleFlags(M);
visitModuleIdents(M);
+ // Verify debug info last.
+ verifyDebugInfo();
+
return !Broken;
}
@@ -347,6 +340,8 @@ private:
void visitUserOp1(Instruction &I);
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+ template <class DbgIntrinsicTy>
+ void visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII);
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
void visitAtomicRMWInst(AtomicRMWInst &RMWI);
void visitFenceInst(FenceInst &FI);
@@ -373,18 +368,9 @@ private:
void VerifyConstantExprBitcastType(const ConstantExpr *CE);
void VerifyStatepoint(ImmutableCallSite CS);
-};
-class DebugInfoVerifier : public VerifierSupport {
-public:
- explicit DebugInfoVerifier(raw_ostream &OS = dbgs()) : VerifierSupport(OS) {}
+ void verifyFrameRecoverIndices();
- bool verify(const Module &M) {
- this->M = &M;
- verifyDebugInfo();
- return !Broken;
- }
-
-private:
+ // Module-level debug info verification...
void verifyDebugInfo();
void processInstructions(DebugInfoFinder &Finder);
void processCallInst(DebugInfoFinder &Finder, const CallInst &CI);
@@ -392,66 +378,58 @@ private:
} // End anonymous namespace
// Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, M) \
- do { if (!(C)) { CheckFailed(M); return; } } while (0)
-#define Assert1(C, M, V1) \
- do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
-#define Assert2(C, M, V1, V2) \
- do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
-#define Assert3(C, M, V1, V2, V3) \
- do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
-#define Assert4(C, M, V1, V2, V3, V4) \
- do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+#define Assert(C, ...) \
+ do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
void Verifier::visit(Instruction &I) {
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- Assert1(I.getOperand(i) != nullptr, "Operand is null", &I);
+ Assert(I.getOperand(i) != nullptr, "Operand is null", &I);
InstVisitor<Verifier>::visit(I);
}
void Verifier::visitGlobalValue(const GlobalValue &GV) {
- Assert1(!GV.isDeclaration() || GV.hasExternalLinkage() ||
- GV.hasExternalWeakLinkage(),
- "Global is external, but doesn't have external or weak linkage!",
- &GV);
+ Assert(!GV.isDeclaration() || GV.hasExternalLinkage() ||
+ GV.hasExternalWeakLinkage(),
+ "Global is external, but doesn't have external or weak linkage!", &GV);
- Assert1(GV.getAlignment() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &GV);
- Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
- "Only global variables can have appending linkage!", &GV);
+ Assert(GV.getAlignment() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &GV);
+ Assert(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+ "Only global variables can have appending linkage!", &GV);
if (GV.hasAppendingLinkage()) {
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
- Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
- "Only global arrays can have appending linkage!", GVar);
+ Assert(GVar && GVar->getType()->getElementType()->isArrayTy(),
+ "Only global arrays can have appending linkage!", GVar);
}
}
void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
if (GV.hasInitializer()) {
- Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
- "Global variable initializer type does not match global "
- "variable type!", &GV);
+ Assert(GV.getInitializer()->getType() == GV.getType()->getElementType(),
+ "Global variable initializer type does not match global "
+ "variable type!",
+ &GV);
// If the global has common linkage, it must have a zero initializer and
// cannot be constant.
if (GV.hasCommonLinkage()) {
- Assert1(GV.getInitializer()->isNullValue(),
- "'common' global must have a zero initializer!", &GV);
- Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
- &GV);
- Assert1(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV);
+ Assert(GV.getInitializer()->isNullValue(),
+ "'common' global must have a zero initializer!", &GV);
+ Assert(!GV.isConstant(), "'common' global may not be marked constant!",
+ &GV);
+ Assert(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV);
}
} else {
- Assert1(GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(),
- "invalid linkage type for global declaration", &GV);
+ Assert(GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(),
+ "invalid linkage type for global declaration", &GV);
}
if (GV.hasName() && (GV.getName() == "llvm.global_ctors" ||
GV.getName() == "llvm.global_dtors")) {
- Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
- "invalid linkage for intrinsic global variable", &GV);
+ Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+ "invalid linkage for intrinsic global variable", &GV);
// Don't worry about emitting an error for it not being an array,
// visitGlobalValue will complain on appending non-array.
if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType()->getElementType())) {
@@ -459,48 +437,48 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
PointerType *FuncPtrTy =
FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
// FIXME: Reject the 2-field form in LLVM 4.0.
- Assert1(STy && (STy->getNumElements() == 2 ||
- STy->getNumElements() == 3) &&
- STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
- STy->getTypeAtIndex(1) == FuncPtrTy,
- "wrong type for intrinsic global variable", &GV);
+ Assert(STy &&
+ (STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
+ STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
+ STy->getTypeAtIndex(1) == FuncPtrTy,
+ "wrong type for intrinsic global variable", &GV);
if (STy->getNumElements() == 3) {
Type *ETy = STy->getTypeAtIndex(2);
- Assert1(ETy->isPointerTy() &&
- cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
- "wrong type for intrinsic global variable", &GV);
+ Assert(ETy->isPointerTy() &&
+ cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
+ "wrong type for intrinsic global variable", &GV);
}
}
}
if (GV.hasName() && (GV.getName() == "llvm.used" ||
GV.getName() == "llvm.compiler.used")) {
- Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
- "invalid linkage for intrinsic global variable", &GV);
+ Assert(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+ "invalid linkage for intrinsic global variable", &GV);
Type *GVType = GV.getType()->getElementType();
if (ArrayType *ATy = dyn_cast<ArrayType>(GVType)) {
PointerType *PTy = dyn_cast<PointerType>(ATy->getElementType());
- Assert1(PTy, "wrong type for intrinsic global variable", &GV);
+ Assert(PTy, "wrong type for intrinsic global variable", &GV);
if (GV.hasInitializer()) {
const Constant *Init = GV.getInitializer();
const ConstantArray *InitArray = dyn_cast<ConstantArray>(Init);
- Assert1(InitArray, "wrong initalizer for intrinsic global variable",
- Init);
+ Assert(InitArray, "wrong initalizer for intrinsic global variable",
+ Init);
for (unsigned i = 0, e = InitArray->getNumOperands(); i != e; ++i) {
Value *V = Init->getOperand(i)->stripPointerCastsNoFollowAliases();
- Assert1(
- isa<GlobalVariable>(V) || isa<Function>(V) || isa<GlobalAlias>(V),
- "invalid llvm.used member", V);
- Assert1(V->hasName(), "members of llvm.used must be named", V);
+ Assert(isa<GlobalVariable>(V) || isa<Function>(V) ||
+ isa<GlobalAlias>(V),
+ "invalid llvm.used member", V);
+ Assert(V->hasName(), "members of llvm.used must be named", V);
}
}
}
}
- Assert1(!GV.hasDLLImportStorageClass() ||
- (GV.isDeclaration() && GV.hasExternalLinkage()) ||
- GV.hasAvailableExternallyLinkage(),
- "Global is marked as dllimport, but not external", &GV);
+ Assert(!GV.hasDLLImportStorageClass() ||
+ (GV.isDeclaration() && GV.hasExternalLinkage()) ||
+ GV.hasAvailableExternallyLinkage(),
+ "Global is marked as dllimport, but not external", &GV);
if (!GV.hasInitializer()) {
visitGlobalValue(GV);
@@ -540,13 +518,13 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) {
void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
const GlobalAlias &GA, const Constant &C) {
if (const auto *GV = dyn_cast<GlobalValue>(&C)) {
- Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA);
+ Assert(!GV->isDeclaration(), "Alias must point to a definition", &GA);
if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) {
- Assert1(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
+ Assert(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
- Assert1(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias",
- &GA);
+ Assert(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias",
+ &GA);
} else {
// Only continue verifying subexpressions of GlobalAliases.
// Do not recurse into global initializers.
@@ -567,19 +545,18 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
}
void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
- Assert1(!GA.getName().empty(),
- "Alias name cannot be empty!", &GA);
- Assert1(GlobalAlias::isValidLinkage(GA.getLinkage()),
- "Alias should have private, internal, linkonce, weak, linkonce_odr, "
- "weak_odr, or external linkage!",
- &GA);
+ Assert(!GA.getName().empty(), "Alias name cannot be empty!", &GA);
+ Assert(GlobalAlias::isValidLinkage(GA.getLinkage()),
+ "Alias should have private, internal, linkonce, weak, linkonce_odr, "
+ "weak_odr, or external linkage!",
+ &GA);
const Constant *Aliasee = GA.getAliasee();
- Assert1(Aliasee, "Aliasee cannot be NULL!", &GA);
- Assert1(GA.getType() == Aliasee->getType(),
- "Alias and aliasee types should match!", &GA);
+ Assert(Aliasee, "Aliasee cannot be NULL!", &GA);
+ Assert(GA.getType() == Aliasee->getType(),
+ "Alias and aliasee types should match!", &GA);
- Assert1(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee),
- "Aliasee should be either GlobalValue or ConstantExpr", &GA);
+ Assert(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee),
+ "Aliasee should be either GlobalValue or ConstantExpr", &GA);
visitAliaseeSubExpr(GA, *Aliasee);
@@ -592,6 +569,10 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
if (!MD)
continue;
+ if (NMD.getName() == "llvm.dbg.cu") {
+ Assert(isa<MDCompileUnit>(MD), "invalid compile unit", &NMD, MD);
+ }
+
visitMDNode(*MD);
}
}
@@ -618,8 +599,8 @@ void Verifier::visitMDNode(const MDNode &MD) {
Metadata *Op = MD.getOperand(i);
if (!Op)
continue;
- Assert2(!isa<LocalAsMetadata>(Op), "Invalid operand for global metadata!",
- &MD, Op);
+ Assert(!isa<LocalAsMetadata>(Op), "Invalid operand for global metadata!",
+ &MD, Op);
if (auto *N = dyn_cast<MDNode>(Op)) {
visitMDNode(*N);
continue;
@@ -631,26 +612,26 @@ void Verifier::visitMDNode(const MDNode &MD) {
}
// Check these last, so we diagnose problems in operands first.
- Assert1(!MD.isTemporary(), "Expected no forward declarations!", &MD);
- Assert1(MD.isResolved(), "All nodes should be resolved!", &MD);
+ Assert(!MD.isTemporary(), "Expected no forward declarations!", &MD);
+ Assert(MD.isResolved(), "All nodes should be resolved!", &MD);
}
void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) {
- Assert1(MD.getValue(), "Expected valid value", &MD);
- Assert2(!MD.getValue()->getType()->isMetadataTy(),
- "Unexpected metadata round-trip through values", &MD, MD.getValue());
+ Assert(MD.getValue(), "Expected valid value", &MD);
+ Assert(!MD.getValue()->getType()->isMetadataTy(),
+ "Unexpected metadata round-trip through values", &MD, MD.getValue());
auto *L = dyn_cast<LocalAsMetadata>(&MD);
if (!L)
return;
- Assert1(F, "function-local metadata used outside a function", L);
+ Assert(F, "function-local metadata used outside a function", L);
// If this was an instruction, bb, or argument, verify that it is in the
// function that we expect.
Function *ActualF = nullptr;
if (Instruction *I = dyn_cast<Instruction>(L->getValue())) {
- Assert2(I->getParent(), "function-local metadata not in basic block", L, I);
+ Assert(I->getParent(), "function-local metadata not in basic block", L, I);
ActualF = I->getParent()->getParent();
} else if (BasicBlock *BB = dyn_cast<BasicBlock>(L->getValue()))
ActualF = BB->getParent();
@@ -658,7 +639,7 @@ void Verifier::visitValueAsMetadata(const ValueAsMetadata &MD, Function *F) {
ActualF = A->getParent();
assert(ActualF && "Unimplemented function local metadata case!");
- Assert1(ActualF == F, "function-local metadata used in wrong function", L);
+ Assert(ActualF == F, "function-local metadata used in wrong function", L);
}
void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) {
@@ -678,126 +659,126 @@ void Verifier::visitMetadataAsValue(const MetadataAsValue &MDV, Function *F) {
}
void Verifier::visitMDLocation(const MDLocation &N) {
- Assert1(N.getScope(), "location requires a valid scope", &N);
- if (auto *IA = N.getInlinedAt())
- Assert2(isa<MDLocation>(IA), "inlined-at should be a location", &N, IA);
+ Assert(N.getRawScope() && isa<MDLocalScope>(N.getRawScope()),
+ "location requires a valid scope", &N, N.getRawScope());
+ if (auto *IA = N.getRawInlinedAt())
+ Assert(isa<MDLocation>(IA), "inlined-at should be a location", &N, IA);
}
void Verifier::visitGenericDebugNode(const GenericDebugNode &N) {
- Assert1(N.getTag(), "invalid tag", &N);
+ Assert(N.getTag(), "invalid tag", &N);
}
void Verifier::visitMDSubrange(const MDSubrange &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_subrange_type, "invalid tag", &N);
}
void Verifier::visitMDEnumerator(const MDEnumerator &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_enumerator, "invalid tag", &N);
}
void Verifier::visitMDBasicType(const MDBasicType &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_base_type ||
- N.getTag() == dwarf::DW_TAG_unspecified_type,
- "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_base_type ||
+ N.getTag() == dwarf::DW_TAG_unspecified_type,
+ "invalid tag", &N);
}
void Verifier::visitMDDerivedType(const MDDerivedType &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_typedef ||
- N.getTag() == dwarf::DW_TAG_pointer_type ||
- N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
- N.getTag() == dwarf::DW_TAG_reference_type ||
- N.getTag() == dwarf::DW_TAG_rvalue_reference_type ||
- N.getTag() == dwarf::DW_TAG_const_type ||
- N.getTag() == dwarf::DW_TAG_volatile_type ||
- N.getTag() == dwarf::DW_TAG_restrict_type ||
- N.getTag() == dwarf::DW_TAG_member ||
- N.getTag() == dwarf::DW_TAG_inheritance ||
- N.getTag() == dwarf::DW_TAG_friend,
- "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_typedef ||
+ N.getTag() == dwarf::DW_TAG_pointer_type ||
+ N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
+ N.getTag() == dwarf::DW_TAG_reference_type ||
+ N.getTag() == dwarf::DW_TAG_rvalue_reference_type ||
+ N.getTag() == dwarf::DW_TAG_const_type ||
+ N.getTag() == dwarf::DW_TAG_volatile_type ||
+ N.getTag() == dwarf::DW_TAG_restrict_type ||
+ N.getTag() == dwarf::DW_TAG_member ||
+ N.getTag() == dwarf::DW_TAG_inheritance ||
+ N.getTag() == dwarf::DW_TAG_friend,
+ "invalid tag", &N);
}
void Verifier::visitMDCompositeType(const MDCompositeType &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_array_type ||
- N.getTag() == dwarf::DW_TAG_structure_type ||
- N.getTag() == dwarf::DW_TAG_union_type ||
- N.getTag() == dwarf::DW_TAG_enumeration_type ||
- N.getTag() == dwarf::DW_TAG_subroutine_type ||
- N.getTag() == dwarf::DW_TAG_class_type,
- "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_array_type ||
+ N.getTag() == dwarf::DW_TAG_structure_type ||
+ N.getTag() == dwarf::DW_TAG_union_type ||
+ N.getTag() == dwarf::DW_TAG_enumeration_type ||
+ N.getTag() == dwarf::DW_TAG_subroutine_type ||
+ N.getTag() == dwarf::DW_TAG_class_type,
+ "invalid tag", &N);
}
void Verifier::visitMDSubroutineType(const MDSubroutineType &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_subroutine_type, "invalid tag", &N);
}
void Verifier::visitMDFile(const MDFile &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N);
}
void Verifier::visitMDCompileUnit(const MDCompileUnit &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
}
void Verifier::visitMDSubprogram(const MDSubprogram &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_subprogram, "invalid tag", &N);
}
void Verifier::visitMDLexicalBlock(const MDLexicalBlock &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
}
void Verifier::visitMDLexicalBlockFile(const MDLexicalBlockFile &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_lexical_block, "invalid tag", &N);
}
void Verifier::visitMDNamespace(const MDNamespace &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_namespace, "invalid tag", &N);
}
void Verifier::visitMDTemplateTypeParameter(const MDTemplateTypeParameter &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag",
- &N);
+ Assert(N.getTag() == dwarf::DW_TAG_template_type_parameter, "invalid tag",
+ &N);
}
void Verifier::visitMDTemplateValueParameter(
const MDTemplateValueParameter &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_template_value_parameter ||
- N.getTag() == dwarf::DW_TAG_GNU_template_template_param ||
- N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack,
- "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_template_value_parameter ||
+ N.getTag() == dwarf::DW_TAG_GNU_template_template_param ||
+ N.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack,
+ "invalid tag", &N);
}
void Verifier::visitMDGlobalVariable(const MDGlobalVariable &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
}
void Verifier::visitMDLocalVariable(const MDLocalVariable &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_auto_variable ||
- N.getTag() == dwarf::DW_TAG_arg_variable,
- "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_auto_variable ||
+ N.getTag() == dwarf::DW_TAG_arg_variable,
+ "invalid tag", &N);
}
void Verifier::visitMDExpression(const MDExpression &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_expression, "invalid tag", &N);
- Assert1(N.isValid(), "invalid expression", &N);
+ Assert(N.isValid(), "invalid expression", &N);
}
void Verifier::visitMDObjCProperty(const MDObjCProperty &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
}
void Verifier::visitMDImportedEntity(const MDImportedEntity &N) {
- Assert1(N.getTag() == dwarf::DW_TAG_imported_module ||
- N.getTag() == dwarf::DW_TAG_imported_declaration,
- "invalid tag", &N);
+ Assert(N.getTag() == dwarf::DW_TAG_imported_module ||
+ N.getTag() == dwarf::DW_TAG_imported_declaration,
+ "invalid tag", &N);
}
void Verifier::visitComdat(const Comdat &C) {
// The Module is invalid if the GlobalValue has private linkage. Entities
// with private linkage don't have entries in the symbol table.
if (const GlobalValue *GV = M->getNamedValue(C.getName()))
- Assert1(!GV->hasPrivateLinkage(), "comdat global value has private linkage",
- GV);
+ Assert(!GV->hasPrivateLinkage(), "comdat global value has private linkage",
+ GV);
}
void Verifier::visitModuleIdents(const Module &M) {
@@ -809,12 +790,12 @@ void Verifier::visitModuleIdents(const Module &M) {
// Scan each llvm.ident entry and make sure that this requirement is met.
for (unsigned i = 0, e = Idents->getNumOperands(); i != e; ++i) {
const MDNode *N = Idents->getOperand(i);
- Assert1(N->getNumOperands() == 1,
- "incorrect number of operands in llvm.ident metadata", N);
- Assert1(dyn_cast_or_null<MDString>(N->getOperand(0)),
- ("invalid value for llvm.ident metadata entry operand"
- "(the operand should be a string)"),
- N->getOperand(0));
+ Assert(N->getNumOperands() == 1,
+ "incorrect number of operands in llvm.ident metadata", N);
+ Assert(dyn_cast_or_null<MDString>(N->getOperand(0)),
+ ("invalid value for llvm.ident metadata entry operand"
+ "(the operand should be a string)"),
+ N->getOperand(0));
}
}
@@ -857,22 +838,21 @@ Verifier::visitModuleFlag(const MDNode *Op,
SmallVectorImpl<const MDNode *> &Requirements) {
// Each module flag should have three arguments, the merge behavior (a
// constant int), the flag ID (an MDString), and the value.
- Assert1(Op->getNumOperands() == 3,
- "incorrect number of operands in module flag", Op);
+ Assert(Op->getNumOperands() == 3,
+ "incorrect number of operands in module flag", Op);
Module::ModFlagBehavior MFB;
if (!Module::isValidModFlagBehavior(Op->getOperand(0), MFB)) {
- Assert1(
+ Assert(
mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)),
"invalid behavior operand in module flag (expected constant integer)",
Op->getOperand(0));
- Assert1(false,
- "invalid behavior operand in module flag (unexpected constant)",
- Op->getOperand(0));
+ Assert(false,
+ "invalid behavior operand in module flag (unexpected constant)",
+ Op->getOperand(0));
}
MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
- Assert1(ID,
- "invalid ID operand in module flag (expected metadata string)",
- Op->getOperand(1));
+ Assert(ID, "invalid ID operand in module flag (expected metadata string)",
+ Op->getOperand(1));
// Sanity check the values for behaviors with additional requirements.
switch (MFB) {
@@ -886,13 +866,13 @@ Verifier::visitModuleFlag(const MDNode *Op,
// The value should itself be an MDNode with two operands, a flag ID (an
// MDString), and a value.
MDNode *Value = dyn_cast<MDNode>(Op->getOperand(2));
- Assert1(Value && Value->getNumOperands() == 2,
- "invalid value for 'require' module flag (expected metadata pair)",
- Op->getOperand(2));
- Assert1(isa<MDString>(Value->getOperand(0)),
- ("invalid value for 'require' module flag "
- "(first value operand should be a string)"),
- Value->getOperand(0));
+ Assert(Value && Value->getNumOperands() == 2,
+ "invalid value for 'require' module flag (expected metadata pair)",
+ Op->getOperand(2));
+ Assert(isa<MDString>(Value->getOperand(0)),
+ ("invalid value for 'require' module flag "
+ "(first value operand should be a string)"),
+ Value->getOperand(0));
// Append it to the list of requirements, to check once all module flags are
// scanned.
@@ -903,9 +883,10 @@ Verifier::visitModuleFlag(const MDNode *Op,
case Module::Append:
case Module::AppendUnique: {
// These behavior types require the operand be an MDNode.
- Assert1(isa<MDNode>(Op->getOperand(2)),
- "invalid value for 'append'-type module flag "
- "(expected a metadata node)", Op->getOperand(2));
+ Assert(isa<MDNode>(Op->getOperand(2)),
+ "invalid value for 'append'-type module flag "
+ "(expected a metadata node)",
+ Op->getOperand(2));
break;
}
}
@@ -913,9 +894,8 @@ Verifier::visitModuleFlag(const MDNode *Op,
// Unless this is a "requires" flag, check the ID is unique.
if (MFB != Module::Require) {
bool Inserted = SeenIDs.insert(std::make_pair(ID, Op)).second;
- Assert1(Inserted,
- "module flag identifiers must be unique (or of 'require' type)",
- ID);
+ Assert(Inserted,
+ "module flag identifiers must be unique (or of 'require' type)", ID);
}
}
@@ -991,14 +971,15 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
VerifyAttributeTypes(Attrs, Idx, false, V);
if (isReturnValue)
- Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
- !Attrs.hasAttribute(Idx, Attribute::Nest) &&
- !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
- !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
- !Attrs.hasAttribute(Idx, Attribute::Returned) &&
- !Attrs.hasAttribute(Idx, Attribute::InAlloca),
- "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', and "
- "'returned' do not apply to return values!", V);
+ Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+ !Attrs.hasAttribute(Idx, Attribute::Nest) &&
+ !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+ !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
+ !Attrs.hasAttribute(Idx, Attribute::Returned) &&
+ !Attrs.hasAttribute(Idx, Attribute::InAlloca),
+ "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', and "
+ "'returned' do not apply to return values!",
+ V);
// Check for mutually incompatible attributes. Only inreg is compatible with
// sret.
@@ -1008,45 +989,58 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
AttrCount += Attrs.hasAttribute(Idx, Attribute::StructRet) ||
Attrs.hasAttribute(Idx, Attribute::InReg);
AttrCount += Attrs.hasAttribute(Idx, Attribute::Nest);
- Assert1(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', "
- "and 'sret' are incompatible!", V);
-
- Assert1(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) &&
- Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes "
- "'inalloca and readonly' are incompatible!", V);
-
- Assert1(!(Attrs.hasAttribute(Idx, Attribute::StructRet) &&
- Attrs.hasAttribute(Idx, Attribute::Returned)), "Attributes "
- "'sret and returned' are incompatible!", V);
-
- Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
- Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes "
- "'zeroext and signext' are incompatible!", V);
-
- Assert1(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
- Attrs.hasAttribute(Idx, Attribute::ReadOnly)), "Attributes "
- "'readnone and readonly' are incompatible!", V);
-
- Assert1(!(Attrs.hasAttribute(Idx, Attribute::NoInline) &&
- Attrs.hasAttribute(Idx, Attribute::AlwaysInline)), "Attributes "
- "'noinline and alwaysinline' are incompatible!", V);
-
- Assert1(!AttrBuilder(Attrs, Idx).
- hasAttributes(AttributeFuncs::typeIncompatible(Ty, Idx), Idx),
- "Wrong types for attribute: " +
- AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx), V);
+ Assert(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', "
+ "and 'sret' are incompatible!",
+ V);
+
+ Assert(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) &&
+ Attrs.hasAttribute(Idx, Attribute::ReadOnly)),
+ "Attributes "
+ "'inalloca and readonly' are incompatible!",
+ V);
+
+ Assert(!(Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+ Attrs.hasAttribute(Idx, Attribute::Returned)),
+ "Attributes "
+ "'sret and returned' are incompatible!",
+ V);
+
+ Assert(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
+ Attrs.hasAttribute(Idx, Attribute::SExt)),
+ "Attributes "
+ "'zeroext and signext' are incompatible!",
+ V);
+
+ Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
+ Attrs.hasAttribute(Idx, Attribute::ReadOnly)),
+ "Attributes "
+ "'readnone and readonly' are incompatible!",
+ V);
+
+ Assert(!(Attrs.hasAttribute(Idx, Attribute::NoInline) &&
+ Attrs.hasAttribute(Idx, Attribute::AlwaysInline)),
+ "Attributes "
+ "'noinline and alwaysinline' are incompatible!",
+ V);
+
+ Assert(!AttrBuilder(Attrs, Idx)
+ .hasAttributes(AttributeFuncs::typeIncompatible(Ty, Idx), Idx),
+ "Wrong types for attribute: " +
+ AttributeFuncs::typeIncompatible(Ty, Idx).getAsString(Idx),
+ V);
if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
- if (!PTy->getElementType()->isSized()) {
- Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
- !Attrs.hasAttribute(Idx, Attribute::InAlloca),
- "Attributes 'byval' and 'inalloca' do not support unsized types!",
- V);
+ SmallPtrSet<const Type*, 4> Visited;
+ if (!PTy->getElementType()->isSized(&Visited)) {
+ Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
+ !Attrs.hasAttribute(Idx, Attribute::InAlloca),
+ "Attributes 'byval' and 'inalloca' do not support unsized types!",
+ V);
}
} else {
- Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal),
- "Attribute 'byval' only applies to parameters with pointer type!",
- V);
+ Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal),
+ "Attribute 'byval' only applies to parameters with pointer type!",
+ V);
}
}
@@ -1078,28 +1072,30 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
continue;
if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
- Assert1(!SawNest, "More than one parameter has attribute nest!", V);
+ Assert(!SawNest, "More than one parameter has attribute nest!", V);
SawNest = true;
}
if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
- Assert1(!SawReturned, "More than one parameter has attribute returned!",
- V);
- Assert1(Ty->canLosslesslyBitCastTo(FT->getReturnType()), "Incompatible "
- "argument and return types for 'returned' attribute", V);
+ Assert(!SawReturned, "More than one parameter has attribute returned!",
+ V);
+ Assert(Ty->canLosslesslyBitCastTo(FT->getReturnType()),
+ "Incompatible "
+ "argument and return types for 'returned' attribute",
+ V);
SawReturned = true;
}
if (Attrs.hasAttribute(Idx, Attribute::StructRet)) {
- Assert1(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
- Assert1(Idx == 1 || Idx == 2,
- "Attribute 'sret' is not on first or second parameter!", V);
+ Assert(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
+ Assert(Idx == 1 || Idx == 2,
+ "Attribute 'sret' is not on first or second parameter!", V);
SawSRet = true;
}
if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) {
- Assert1(Idx == FT->getNumParams(),
- "inalloca isn't on the last parameter!", V);
+ Assert(Idx == FT->getNumParams(), "inalloca isn't on the last parameter!",
+ V);
}
}
@@ -1108,39 +1104,35 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
VerifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V);
- Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ReadNone) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ReadOnly)),
- "Attributes 'readnone and readonly' are incompatible!", V);
+ Assert(
+ !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly)),
+ "Attributes 'readnone and readonly' are incompatible!", V);
- Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoInline) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline)),
- "Attributes 'noinline and alwaysinline' are incompatible!", V);
+ Assert(
+ !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline) &&
+ Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::AlwaysInline)),
+ "Attributes 'noinline and alwaysinline' are incompatible!", V);
if (Attrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeNone)) {
- Assert1(Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoInline),
- "Attribute 'optnone' requires 'noinline'!", V);
+ Assert(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline),
+ "Attribute 'optnone' requires 'noinline'!", V);
- Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize),
- "Attributes 'optsize and optnone' are incompatible!", V);
+ Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize),
+ "Attributes 'optsize and optnone' are incompatible!", V);
- Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize),
- "Attributes 'minsize and optnone' are incompatible!", V);
+ Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize),
+ "Attributes 'minsize and optnone' are incompatible!", V);
}
if (Attrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::JumpTable)) {
const GlobalValue *GV = cast<GlobalValue>(V);
- Assert1(GV->hasUnnamedAddr(),
- "Attribute 'jumptable' requires 'unnamed_addr'", V);
-
+ Assert(GV->hasUnnamedAddr(),
+ "Attribute 'jumptable' requires 'unnamed_addr'", V);
}
}
@@ -1148,9 +1140,9 @@ void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) {
if (CE->getOpcode() != Instruction::BitCast)
return;
- Assert1(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
- CE->getType()),
- "Invalid bitcast", CE);
+ Assert(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
+ CE->getType()),
+ "Invalid bitcast", CE);
}
bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) {
@@ -1175,84 +1167,86 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
const Instruction &CI = *CS.getInstruction();
- Assert1(!CS.doesNotAccessMemory() &&
- !CS.onlyReadsMemory(),
- "gc.statepoint must read and write memory to preserve "
- "reordering restrictions required by safepoint semantics", &CI);
-
+ Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory(),
+ "gc.statepoint must read and write memory to preserve "
+ "reordering restrictions required by safepoint semantics",
+ &CI);
+
const Value *Target = CS.getArgument(0);
const PointerType *PT = dyn_cast<PointerType>(Target->getType());
- Assert2(PT && PT->getElementType()->isFunctionTy(),
- "gc.statepoint callee must be of function pointer type",
- &CI, Target);
+ Assert(PT && PT->getElementType()->isFunctionTy(),
+ "gc.statepoint callee must be of function pointer type", &CI, Target);
FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
const Value *NumCallArgsV = CS.getArgument(1);
- Assert1(isa<ConstantInt>(NumCallArgsV),
- "gc.statepoint number of arguments to underlying call "
- "must be constant integer", &CI);
+ Assert(isa<ConstantInt>(NumCallArgsV),
+ "gc.statepoint number of arguments to underlying call "
+ "must be constant integer",
+ &CI);
const int NumCallArgs = cast<ConstantInt>(NumCallArgsV)->getZExtValue();
- Assert1(NumCallArgs >= 0,
- "gc.statepoint number of arguments to underlying call "
- "must be positive", &CI);
+ Assert(NumCallArgs >= 0,
+ "gc.statepoint number of arguments to underlying call "
+ "must be positive",
+ &CI);
const int NumParams = (int)TargetFuncType->getNumParams();
if (TargetFuncType->isVarArg()) {
- Assert1(NumCallArgs >= NumParams,
- "gc.statepoint mismatch in number of vararg call args", &CI);
+ Assert(NumCallArgs >= NumParams,
+ "gc.statepoint mismatch in number of vararg call args", &CI);
// TODO: Remove this limitation
- Assert1(TargetFuncType->getReturnType()->isVoidTy(),
- "gc.statepoint doesn't support wrapping non-void "
- "vararg functions yet", &CI);
+ Assert(TargetFuncType->getReturnType()->isVoidTy(),
+ "gc.statepoint doesn't support wrapping non-void "
+ "vararg functions yet",
+ &CI);
} else
- Assert1(NumCallArgs == NumParams,
- "gc.statepoint mismatch in number of call args", &CI);
+ Assert(NumCallArgs == NumParams,
+ "gc.statepoint mismatch in number of call args", &CI);
const Value *Unused = CS.getArgument(2);
- Assert1(isa<ConstantInt>(Unused) &&
- cast<ConstantInt>(Unused)->isNullValue(),
- "gc.statepoint parameter #3 must be zero", &CI);
+ Assert(isa<ConstantInt>(Unused) && cast<ConstantInt>(Unused)->isNullValue(),
+ "gc.statepoint parameter #3 must be zero", &CI);
// Verify that the types of the call parameter arguments match
// the type of the wrapped callee.
for (int i = 0; i < NumParams; i++) {
Type *ParamType = TargetFuncType->getParamType(i);
Type *ArgType = CS.getArgument(3+i)->getType();
- Assert1(ArgType == ParamType,
- "gc.statepoint call argument does not match wrapped "
- "function type", &CI);
+ Assert(ArgType == ParamType,
+ "gc.statepoint call argument does not match wrapped "
+ "function type",
+ &CI);
}
const int EndCallArgsInx = 2+NumCallArgs;
const Value *NumDeoptArgsV = CS.getArgument(EndCallArgsInx+1);
- Assert1(isa<ConstantInt>(NumDeoptArgsV),
- "gc.statepoint number of deoptimization arguments "
- "must be constant integer", &CI);
+ Assert(isa<ConstantInt>(NumDeoptArgsV),
+ "gc.statepoint number of deoptimization arguments "
+ "must be constant integer",
+ &CI);
const int NumDeoptArgs = cast<ConstantInt>(NumDeoptArgsV)->getZExtValue();
- Assert1(NumDeoptArgs >= 0,
- "gc.statepoint number of deoptimization arguments "
- "must be positive", &CI);
+ Assert(NumDeoptArgs >= 0, "gc.statepoint number of deoptimization arguments "
+ "must be positive",
+ &CI);
+
+ Assert(4 + NumCallArgs + NumDeoptArgs <= (int)CS.arg_size(),
+ "gc.statepoint too few arguments according to length fields", &CI);
- Assert1(4 + NumCallArgs + NumDeoptArgs <= (int)CS.arg_size(),
- "gc.statepoint too few arguments according to length fields", &CI);
-
// Check that the only uses of this gc.statepoint are gc.result or
// gc.relocate calls which are tied to this statepoint and thus part
// of the same statepoint sequence
for (const User *U : CI.users()) {
const CallInst *Call = dyn_cast<const CallInst>(U);
- Assert2(Call, "illegal use of statepoint token", &CI, U);
+ Assert(Call, "illegal use of statepoint token", &CI, U);
if (!Call) continue;
- Assert2(isGCRelocate(Call) || isGCResult(Call),
- "gc.result or gc.relocate are the only value uses"
- "of a gc.statepoint", &CI, U);
+ Assert(isGCRelocate(Call) || isGCResult(Call),
+ "gc.result or gc.relocate are the only value uses"
+ "of a gc.statepoint",
+ &CI, U);
if (isGCResult(Call)) {
- Assert2(Call->getArgOperand(0) == &CI,
- "gc.result connected to wrong gc.statepoint",
- &CI, Call);
+ Assert(Call->getArgOperand(0) == &CI,
+ "gc.result connected to wrong gc.statepoint", &CI, Call);
} else if (isGCRelocate(Call)) {
- Assert2(Call->getArgOperand(0) == &CI,
- "gc.relocate connected to wrong gc.statepoint",
- &CI, Call);
+ Assert(Call->getArgOperand(0) == &CI,
+ "gc.relocate connected to wrong gc.statepoint", &CI, Call);
}
}
@@ -1266,6 +1260,19 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
// about. See example statepoint.ll in the verifier subdirectory
}
+void Verifier::verifyFrameRecoverIndices() {
+ for (auto &Counts : FrameEscapeInfo) {
+ Function *F = Counts.first;
+ unsigned EscapedObjectCount = Counts.second.first;
+ unsigned MaxRecoveredIndex = Counts.second.second;
+ Assert(MaxRecoveredIndex <= EscapedObjectCount,
+ "all indices passed to llvm.framerecover must be less than the "
+ "number of arguments passed ot llvm.frameescape in the parent "
+ "function",
+ F);
+ }
+}
+
// visitFunction - Verify that a function is ok.
//
void Verifier::visitFunction(const Function &F) {
@@ -1273,25 +1280,24 @@ void Verifier::visitFunction(const Function &F) {
FunctionType *FT = F.getFunctionType();
unsigned NumArgs = F.arg_size();
- Assert1(Context == &F.getContext(),
- "Function context does not match Module context!", &F);
+ Assert(Context == &F.getContext(),
+ "Function context does not match Module context!", &F);
- Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
- Assert2(FT->getNumParams() == NumArgs,
- "# formal arguments must match # of arguments for function type!",
- &F, FT);
- Assert1(F.getReturnType()->isFirstClassType() ||
- F.getReturnType()->isVoidTy() ||
- F.getReturnType()->isStructTy(),
- "Functions cannot return aggregate values!", &F);
+ Assert(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
+ Assert(FT->getNumParams() == NumArgs,
+ "# formal arguments must match # of arguments for function type!", &F,
+ FT);
+ Assert(F.getReturnType()->isFirstClassType() ||
+ F.getReturnType()->isVoidTy() || F.getReturnType()->isStructTy(),
+ "Functions cannot return aggregate values!", &F);
- Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
- "Invalid struct return type!", &F);
+ Assert(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
+ "Invalid struct return type!", &F);
AttributeSet Attrs = F.getAttributes();
- Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
- "Attribute after last parameter!", &F);
+ Assert(VerifyAttributeCount(Attrs, FT->getNumParams()),
+ "Attribute after last parameter!", &F);
// Check function attributes.
VerifyFunctionAttrs(FT, Attrs, &F);
@@ -1299,9 +1305,8 @@ void Verifier::visitFunction(const Function &F) {
// On function declarations/definitions, we do not support the builtin
// attribute. We do not check this in VerifyFunctionAttrs since that is
// checking for Attributes that can/can not ever be on functions.
- Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Builtin),
- "Attribute 'builtin' can only be applied to a callsite.", &F);
+ Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::Builtin),
+ "Attribute 'builtin' can only be applied to a callsite.", &F);
// Check that this function meets the restrictions on this calling convention.
// Sometimes varargs is used for perfectly forwarding thunks, so some of these
@@ -1315,8 +1320,9 @@ void Verifier::visitFunction(const Function &F) {
case CallingConv::Intel_OCL_BI:
case CallingConv::PTX_Kernel:
case CallingConv::PTX_Device:
- Assert1(!F.isVarArg(), "Calling convention does not support varargs or "
- "perfect forwarding!", &F);
+ Assert(!F.isVarArg(), "Calling convention does not support varargs or "
+ "perfect forwarding!",
+ &F);
break;
}
@@ -1327,35 +1333,35 @@ void Verifier::visitFunction(const Function &F) {
unsigned i = 0;
for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
++I, ++i) {
- Assert2(I->getType() == FT->getParamType(i),
- "Argument value does not match function argument type!",
- I, FT->getParamType(i));
- Assert1(I->getType()->isFirstClassType(),
- "Function arguments must have first-class types!", I);
+ Assert(I->getType() == FT->getParamType(i),
+ "Argument value does not match function argument type!", I,
+ FT->getParamType(i));
+ Assert(I->getType()->isFirstClassType(),
+ "Function arguments must have first-class types!", I);
if (!isLLVMdotName)
- Assert2(!I->getType()->isMetadataTy(),
- "Function takes metadata but isn't an intrinsic", I, &F);
+ Assert(!I->getType()->isMetadataTy(),
+ "Function takes metadata but isn't an intrinsic", I, &F);
}
if (F.isMaterializable()) {
// Function has a body somewhere we can't see.
} else if (F.isDeclaration()) {
- Assert1(F.hasExternalLinkage() || F.hasExternalWeakLinkage(),
- "invalid linkage type for function declaration", &F);
+ Assert(F.hasExternalLinkage() || F.hasExternalWeakLinkage(),
+ "invalid linkage type for function declaration", &F);
} else {
// Verify that this function (which has a body) is not named "llvm.*". It
// is not legal to define intrinsics.
- Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
+ Assert(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
// Check the entry node
const BasicBlock *Entry = &F.getEntryBlock();
- Assert1(pred_empty(Entry),
- "Entry block to function must not have predecessors!", Entry);
+ Assert(pred_empty(Entry),
+ "Entry block to function must not have predecessors!", Entry);
// The address of the entry block cannot be taken, unless it is dead.
if (Entry->hasAddressTaken()) {
- Assert1(!BlockAddress::lookup(Entry)->isConstantUsed(),
- "blockaddress may not be used with the entry block!", Entry);
+ Assert(!BlockAddress::lookup(Entry)->isConstantUsed(),
+ "blockaddress may not be used with the entry block!", Entry);
}
}
@@ -1364,13 +1370,13 @@ void Verifier::visitFunction(const Function &F) {
if (F.getIntrinsicID()) {
const User *U;
if (F.hasAddressTaken(&U))
- Assert1(0, "Invalid user of intrinsic instruction!", U);
+ Assert(0, "Invalid user of intrinsic instruction!", U);
}
- Assert1(!F.hasDLLImportStorageClass() ||
- (F.isDeclaration() && F.hasExternalLinkage()) ||
- F.hasAvailableExternallyLinkage(),
- "Function is marked as dllimport, but not external.", &F);
+ Assert(!F.hasDLLImportStorageClass() ||
+ (F.isDeclaration() && F.hasExternalLinkage()) ||
+ F.hasAvailableExternallyLinkage(),
+ "Function is marked as dllimport, but not external.", &F);
}
// verifyBasicBlock - Verify that a basic block is well formed...
@@ -1379,7 +1385,7 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
InstsInThisBlock.clear();
// Ensure that basic blocks have terminators!
- Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+ Assert(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
// Check constraints that this basic block imposes on all of the PHI nodes in
// it.
@@ -1390,12 +1396,14 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
PHINode *PN;
for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
// Ensure that PHI nodes have at least one entry!
- Assert1(PN->getNumIncomingValues() != 0,
- "PHI nodes must have at least one entry. If the block is dead, "
- "the PHI should be removed!", PN);
- Assert1(PN->getNumIncomingValues() == Preds.size(),
- "PHINode should have one entry for each predecessor of its "
- "parent basic block!", PN);
+ Assert(PN->getNumIncomingValues() != 0,
+ "PHI nodes must have at least one entry. If the block is dead, "
+ "the PHI should be removed!",
+ PN);
+ Assert(PN->getNumIncomingValues() == Preds.size(),
+ "PHINode should have one entry for each predecessor of its "
+ "parent basic block!",
+ PN);
// Get and sort all incoming values in the PHI node...
Values.clear();
@@ -1410,17 +1418,17 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
// particular basic block in this PHI node, that the incoming values are
// all identical.
//
- Assert4(i == 0 || Values[i].first != Values[i-1].first ||
- Values[i].second == Values[i-1].second,
- "PHI node has multiple entries for the same basic block with "
- "different incoming values!", PN, Values[i].first,
- Values[i].second, Values[i-1].second);
+ Assert(i == 0 || Values[i].first != Values[i - 1].first ||
+ Values[i].second == Values[i - 1].second,
+ "PHI node has multiple entries for the same basic block with "
+ "different incoming values!",
+ PN, Values[i].first, Values[i].second, Values[i - 1].second);
// Check to make sure that the predecessors and PHI node entries are
// matched up.
- Assert3(Values[i].first == Preds[i],
- "PHI node entries do not match predecessors!", PN,
- Values[i].first, Preds[i]);
+ Assert(Values[i].first == Preds[i],
+ "PHI node entries do not match predecessors!", PN,
+ Values[i].first, Preds[i]);
}
}
}
@@ -1434,15 +1442,15 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
void Verifier::visitTerminatorInst(TerminatorInst &I) {
// Ensure that terminators only exist at the end of the basic block.
- Assert1(&I == I.getParent()->getTerminator(),
- "Terminator found in the middle of a basic block!", I.getParent());
+ Assert(&I == I.getParent()->getTerminator(),
+ "Terminator found in the middle of a basic block!", I.getParent());
visitInstruction(I);
}
void Verifier::visitBranchInst(BranchInst &BI) {
if (BI.isConditional()) {
- Assert2(BI.getCondition()->getType()->isIntegerTy(1),
- "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+ Assert(BI.getCondition()->getType()->isIntegerTy(1),
+ "Branch condition is not 'i1' type!", &BI, BI.getCondition());
}
visitTerminatorInst(BI);
}
@@ -1451,13 +1459,15 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
Function *F = RI.getParent()->getParent();
unsigned N = RI.getNumOperands();
if (F->getReturnType()->isVoidTy())
- Assert2(N == 0,
- "Found return instr that returns non-void in Function of void "
- "return type!", &RI, F->getReturnType());
+ Assert(N == 0,
+ "Found return instr that returns non-void in Function of void "
+ "return type!",
+ &RI, F->getReturnType());
else
- Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
- "Function return type does not match operand "
- "type of return inst!", &RI, F->getReturnType());
+ Assert(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(),
+ "Function return type does not match operand "
+ "type of return inst!",
+ &RI, F->getReturnType());
// Check to make sure that the return value has necessary properties for
// terminators...
@@ -1470,32 +1480,32 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
Type *SwitchTy = SI.getCondition()->getType();
SmallPtrSet<ConstantInt*, 32> Constants;
for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
- Assert1(i.getCaseValue()->getType() == SwitchTy,
- "Switch constants must all be same type as switch value!", &SI);
- Assert2(Constants.insert(i.getCaseValue()).second,
- "Duplicate integer as switch case", &SI, i.getCaseValue());
+ Assert(i.getCaseValue()->getType() == SwitchTy,
+ "Switch constants must all be same type as switch value!", &SI);
+ Assert(Constants.insert(i.getCaseValue()).second,
+ "Duplicate integer as switch case", &SI, i.getCaseValue());
}
visitTerminatorInst(SI);
}
void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
- Assert1(BI.getAddress()->getType()->isPointerTy(),
- "Indirectbr operand must have pointer type!", &BI);
+ Assert(BI.getAddress()->getType()->isPointerTy(),
+ "Indirectbr operand must have pointer type!", &BI);
for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
- Assert1(BI.getDestination(i)->getType()->isLabelTy(),
- "Indirectbr destinations must all have pointer type!", &BI);
+ Assert(BI.getDestination(i)->getType()->isLabelTy(),
+ "Indirectbr destinations must all have pointer type!", &BI);
visitTerminatorInst(BI);
}
void Verifier::visitSelectInst(SelectInst &SI) {
- Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
- SI.getOperand(2)),
- "Invalid operands for select instruction!", &SI);
+ Assert(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+ SI.getOperand(2)),
+ "Invalid operands for select instruction!", &SI);
- Assert1(SI.getTrueValue()->getType() == SI.getType(),
- "Select values must have same type as select instruction!", &SI);
+ Assert(SI.getTrueValue()->getType() == SI.getType(),
+ "Select values must have same type as select instruction!", &SI);
visitInstruction(SI);
}
@@ -1503,7 +1513,7 @@ void Verifier::visitSelectInst(SelectInst &SI) {
/// a pass, if any exist, it's an error.
///
void Verifier::visitUserOp1(Instruction &I) {
- Assert1(0, "User-defined operators should not live outside of a pass!", &I);
+ Assert(0, "User-defined operators should not live outside of a pass!", &I);
}
void Verifier::visitTruncInst(TruncInst &I) {
@@ -1515,11 +1525,11 @@ void Verifier::visitTruncInst(TruncInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
- Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
- Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "trunc source and destination must both be a vector or neither", &I);
- Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
+ Assert(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+ Assert(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+ Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "trunc source and destination must both be a vector or neither", &I);
+ Assert(SrcBitSize > DestBitSize, "DestTy too big for Trunc", &I);
visitInstruction(I);
}
@@ -1530,14 +1540,14 @@ void Verifier::visitZExtInst(ZExtInst &I) {
Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
- Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
- Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
- Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "zext source and destination must both be a vector or neither", &I);
+ Assert(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+ Assert(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+ Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "zext source and destination must both be a vector or neither", &I);
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
+ Assert(SrcBitSize < DestBitSize, "Type too small for ZExt", &I);
visitInstruction(I);
}
@@ -1551,11 +1561,11 @@ void Verifier::visitSExtInst(SExtInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
- Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
- Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "sext source and destination must both be a vector or neither", &I);
- Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
+ Assert(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+ Assert(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+ Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "sext source and destination must both be a vector or neither", &I);
+ Assert(SrcBitSize < DestBitSize, "Type too small for SExt", &I);
visitInstruction(I);
}
@@ -1568,11 +1578,11 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
- Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
- Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "fptrunc source and destination must both be a vector or neither",&I);
- Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
+ Assert(SrcTy->isFPOrFPVectorTy(), "FPTrunc only operates on FP", &I);
+ Assert(DestTy->isFPOrFPVectorTy(), "FPTrunc only produces an FP", &I);
+ Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "fptrunc source and destination must both be a vector or neither", &I);
+ Assert(SrcBitSize > DestBitSize, "DestTy too big for FPTrunc", &I);
visitInstruction(I);
}
@@ -1586,11 +1596,11 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
- Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
- Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
- Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "fpext source and destination must both be a vector or neither", &I);
- Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
+ Assert(SrcTy->isFPOrFPVectorTy(), "FPExt only operates on FP", &I);
+ Assert(DestTy->isFPOrFPVectorTy(), "FPExt only produces an FP", &I);
+ Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+ "fpext source and destination must both be a vector or neither", &I);
+ Assert(SrcBitSize < DestBitSize, "DestTy too small for FPExt", &I);
visitInstruction(I);
}
@@ -1603,17 +1613,17 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) {
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
- Assert1(SrcVec == DstVec,
- "UIToFP source and dest must both be vector or scalar", &I);
- Assert1(SrcTy->isIntOrIntVectorTy(),
- "UIToFP source must be integer or integer vector", &I);
- Assert1(DestTy->isFPOrFPVectorTy(),
- "UIToFP result must be FP or FP vector", &I);
+ Assert(SrcVec == DstVec,
+ "UIToFP source and dest must both be vector or scalar", &I);
+ Assert(SrcTy->isIntOrIntVectorTy(),
+ "UIToFP source must be integer or integer vector", &I);
+ Assert(DestTy->isFPOrFPVectorTy(), "UIToFP result must be FP or FP vector",
+ &I);
if (SrcVec && DstVec)
- Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
- cast<VectorType>(DestTy)->getNumElements(),
- "UIToFP source and dest vector length mismatch", &I);
+ Assert(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "UIToFP source and dest vector length mismatch", &I);
visitInstruction(I);
}
@@ -1626,17 +1636,17 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
- Assert1(SrcVec == DstVec,
- "SIToFP source and dest must both be vector or scalar", &I);
- Assert1(SrcTy->isIntOrIntVectorTy(),
- "SIToFP source must be integer or integer vector", &I);
- Assert1(DestTy->isFPOrFPVectorTy(),
- "SIToFP result must be FP or FP vector", &I);
+ Assert(SrcVec == DstVec,
+ "SIToFP source and dest must both be vector or scalar", &I);
+ Assert(SrcTy->isIntOrIntVectorTy(),
+ "SIToFP source must be integer or integer vector", &I);
+ Assert(DestTy->isFPOrFPVectorTy(), "SIToFP result must be FP or FP vector",
+ &I);
if (SrcVec && DstVec)
- Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
- cast<VectorType>(DestTy)->getNumElements(),
- "SIToFP source and dest vector length mismatch", &I);
+ Assert(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "SIToFP source and dest vector length mismatch", &I);
visitInstruction(I);
}
@@ -1649,17 +1659,17 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) {
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
- Assert1(SrcVec == DstVec,
- "FPToUI source and dest must both be vector or scalar", &I);
- Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
- &I);
- Assert1(DestTy->isIntOrIntVectorTy(),
- "FPToUI result must be integer or integer vector", &I);
+ Assert(SrcVec == DstVec,
+ "FPToUI source and dest must both be vector or scalar", &I);
+ Assert(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+ &I);
+ Assert(DestTy->isIntOrIntVectorTy(),
+ "FPToUI result must be integer or integer vector", &I);
if (SrcVec && DstVec)
- Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
- cast<VectorType>(DestTy)->getNumElements(),
- "FPToUI source and dest vector length mismatch", &I);
+ Assert(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "FPToUI source and dest vector length mismatch", &I);
visitInstruction(I);
}
@@ -1672,17 +1682,17 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
bool SrcVec = SrcTy->isVectorTy();
bool DstVec = DestTy->isVectorTy();
- Assert1(SrcVec == DstVec,
- "FPToSI source and dest must both be vector or scalar", &I);
- Assert1(SrcTy->isFPOrFPVectorTy(),
- "FPToSI source must be FP or FP vector", &I);
- Assert1(DestTy->isIntOrIntVectorTy(),
- "FPToSI result must be integer or integer vector", &I);
+ Assert(SrcVec == DstVec,
+ "FPToSI source and dest must both be vector or scalar", &I);
+ Assert(SrcTy->isFPOrFPVectorTy(), "FPToSI source must be FP or FP vector",
+ &I);
+ Assert(DestTy->isIntOrIntVectorTy(),
+ "FPToSI result must be integer or integer vector", &I);
if (SrcVec && DstVec)
- Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
- cast<VectorType>(DestTy)->getNumElements(),
- "FPToSI source and dest vector length mismatch", &I);
+ Assert(cast<VectorType>(SrcTy)->getNumElements() ==
+ cast<VectorType>(DestTy)->getNumElements(),
+ "FPToSI source and dest vector length mismatch", &I);
visitInstruction(I);
}
@@ -1692,18 +1702,18 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert1(SrcTy->getScalarType()->isPointerTy(),
- "PtrToInt source must be pointer", &I);
- Assert1(DestTy->getScalarType()->isIntegerTy(),
- "PtrToInt result must be integral", &I);
- Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "PtrToInt type mismatch", &I);
+ Assert(SrcTy->getScalarType()->isPointerTy(),
+ "PtrToInt source must be pointer", &I);
+ Assert(DestTy->getScalarType()->isIntegerTy(),
+ "PtrToInt result must be integral", &I);
+ Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch",
+ &I);
if (SrcTy->isVectorTy()) {
VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
VectorType *VDest = dyn_cast<VectorType>(DestTy);
- Assert1(VSrc->getNumElements() == VDest->getNumElements(),
- "PtrToInt Vector width mismatch", &I);
+ Assert(VSrc->getNumElements() == VDest->getNumElements(),
+ "PtrToInt Vector width mismatch", &I);
}
visitInstruction(I);
@@ -1714,23 +1724,23 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert1(SrcTy->getScalarType()->isIntegerTy(),
- "IntToPtr source must be an integral", &I);
- Assert1(DestTy->getScalarType()->isPointerTy(),
- "IntToPtr result must be a pointer",&I);
- Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
- "IntToPtr type mismatch", &I);
+ Assert(SrcTy->getScalarType()->isIntegerTy(),
+ "IntToPtr source must be an integral", &I);
+ Assert(DestTy->getScalarType()->isPointerTy(),
+ "IntToPtr result must be a pointer", &I);
+ Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch",
+ &I);
if (SrcTy->isVectorTy()) {
VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
VectorType *VDest = dyn_cast<VectorType>(DestTy);
- Assert1(VSrc->getNumElements() == VDest->getNumElements(),
- "IntToPtr Vector width mismatch", &I);
+ Assert(VSrc->getNumElements() == VDest->getNumElements(),
+ "IntToPtr Vector width mismatch", &I);
}
visitInstruction(I);
}
void Verifier::visitBitCastInst(BitCastInst &I) {
- Assert1(
+ Assert(
CastInst::castIsValid(Instruction::BitCast, I.getOperand(0), I.getType()),
"Invalid bitcast", &I);
visitInstruction(I);
@@ -1740,15 +1750,15 @@ void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
Type *SrcTy = I.getOperand(0)->getType();
Type *DestTy = I.getType();
- Assert1(SrcTy->isPtrOrPtrVectorTy(),
- "AddrSpaceCast source must be a pointer", &I);
- Assert1(DestTy->isPtrOrPtrVectorTy(),
- "AddrSpaceCast result must be a pointer", &I);
- Assert1(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
- "AddrSpaceCast must be between different address spaces", &I);
+ Assert(SrcTy->isPtrOrPtrVectorTy(), "AddrSpaceCast source must be a pointer",
+ &I);
+ Assert(DestTy->isPtrOrPtrVectorTy(), "AddrSpaceCast result must be a pointer",
+ &I);
+ Assert(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(),
+ "AddrSpaceCast must be between different address spaces", &I);
if (SrcTy->isVectorTy())
- Assert1(SrcTy->getVectorNumElements() == DestTy->getVectorNumElements(),
- "AddrSpaceCast vector pointer number of elements mismatch", &I);
+ Assert(SrcTy->getVectorNumElements() == DestTy->getVectorNumElements(),
+ "AddrSpaceCast vector pointer number of elements mismatch", &I);
visitInstruction(I);
}
@@ -1759,16 +1769,15 @@ void Verifier::visitPHINode(PHINode &PN) {
// This can be tested by checking whether the instruction before this is
// either nonexistent (because this is begin()) or is a PHI node. If not,
// then there is some other instruction before a PHI.
- Assert2(&PN == &PN.getParent()->front() ||
- isa<PHINode>(--BasicBlock::iterator(&PN)),
- "PHI nodes not grouped at top of basic block!",
- &PN, PN.getParent());
+ Assert(&PN == &PN.getParent()->front() ||
+ isa<PHINode>(--BasicBlock::iterator(&PN)),
+ "PHI nodes not grouped at top of basic block!", &PN, PN.getParent());
// Check that all of the values of the PHI node have the same type as the
// result, and that the incoming blocks are really basic blocks.
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
- Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
- "PHI node operands are not the same type as the result!", &PN);
+ Assert(PN.getType() == PN.getIncomingValue(i)->getType(),
+ "PHI node operands are not the same type as the result!", &PN);
}
// All other PHI node constraints are checked in the visitBasicBlock method.
@@ -1779,32 +1788,32 @@ void Verifier::visitPHINode(PHINode &PN) {
void Verifier::VerifyCallSite(CallSite CS) {
Instruction *I = CS.getInstruction();
- Assert1(CS.getCalledValue()->getType()->isPointerTy(),
- "Called function must be a pointer!", I);
+ Assert(CS.getCalledValue()->getType()->isPointerTy(),
+ "Called function must be a pointer!", I);
PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
- Assert1(FPTy->getElementType()->isFunctionTy(),
- "Called function is not pointer to function type!", I);
+ Assert(FPTy->getElementType()->isFunctionTy(),
+ "Called function is not pointer to function type!", I);
FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
// Verify that the correct number of arguments are being passed
if (FTy->isVarArg())
- Assert1(CS.arg_size() >= FTy->getNumParams(),
- "Called function requires more parameters than were provided!",I);
+ Assert(CS.arg_size() >= FTy->getNumParams(),
+ "Called function requires more parameters than were provided!", I);
else
- Assert1(CS.arg_size() == FTy->getNumParams(),
- "Incorrect number of arguments passed to called function!", I);
+ Assert(CS.arg_size() == FTy->getNumParams(),
+ "Incorrect number of arguments passed to called function!", I);
// Verify that all arguments to the call match the function type.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
- "Call parameter type does not match function signature!",
- CS.getArgument(i), FTy->getParamType(i), I);
+ Assert(CS.getArgument(i)->getType() == FTy->getParamType(i),
+ "Call parameter type does not match function signature!",
+ CS.getArgument(i), FTy->getParamType(i), I);
AttributeSet Attrs = CS.getAttributes();
- Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
- "Attribute after last parameter!", I);
+ Assert(VerifyAttributeCount(Attrs, CS.arg_size()),
+ "Attribute after last parameter!", I);
// Verify call attributes.
VerifyFunctionAttrs(FTy, Attrs, I);
@@ -1815,8 +1824,8 @@ void Verifier::VerifyCallSite(CallSite CS) {
if (CS.hasInAllocaArgument()) {
Value *InAllocaArg = CS.getArgument(FTy->getNumParams() - 1);
if (auto AI = dyn_cast<AllocaInst>(InAllocaArg->stripInBoundsOffsets()))
- Assert2(AI->isUsedWithInAlloca(),
- "inalloca argument for call has mismatched alloca", AI, I);
+ Assert(AI->isUsedWithInAlloca(),
+ "inalloca argument for call has mismatched alloca", AI, I);
}
if (FTy->isVarArg()) {
@@ -1837,25 +1846,25 @@ void Verifier::VerifyCallSite(CallSite CS) {
VerifyParameterAttrs(Attrs, Idx, Ty, false, I);
if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
- Assert1(!SawNest, "More than one parameter has attribute nest!", I);
+ Assert(!SawNest, "More than one parameter has attribute nest!", I);
SawNest = true;
}
if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
- Assert1(!SawReturned, "More than one parameter has attribute returned!",
- I);
- Assert1(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
- "Incompatible argument and return types for 'returned' "
- "attribute", I);
+ Assert(!SawReturned, "More than one parameter has attribute returned!",
+ I);
+ Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
+ "Incompatible argument and return types for 'returned' "
+ "attribute",
+ I);
SawReturned = true;
}
- Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet),
- "Attribute 'sret' cannot be used for vararg call arguments!", I);
+ Assert(!Attrs.hasAttribute(Idx, Attribute::StructRet),
+ "Attribute 'sret' cannot be used for vararg call arguments!", I);
if (Attrs.hasAttribute(Idx, Attribute::InAlloca))
- Assert1(Idx == CS.arg_size(), "inalloca isn't on the last argument!",
- I);
+ Assert(Idx == CS.arg_size(), "inalloca isn't on the last argument!", I);
}
}
@@ -1864,8 +1873,8 @@ void Verifier::VerifyCallSite(CallSite CS) {
!CS.getCalledFunction()->getName().startswith("llvm.")) {
for (FunctionType::param_iterator PI = FTy->param_begin(),
PE = FTy->param_end(); PI != PE; ++PI)
- Assert1(!(*PI)->isMetadataTy(),
- "Function has metadata parameter but isn't an intrinsic", I);
+ Assert(!(*PI)->isMetadataTy(),
+ "Function has metadata parameter but isn't an intrinsic", I);
}
visitInstruction(*I);
@@ -1898,7 +1907,7 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeSet Attrs) {
}
void Verifier::verifyMustTailCall(CallInst &CI) {
- Assert1(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI);
+ Assert(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI);
// - The caller and callee prototypes must match. Pointer types of
// parameters or return types may differ in pointee type, but not
@@ -1910,21 +1919,21 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
};
FunctionType *CallerTy = GetFnTy(F);
FunctionType *CalleeTy = GetFnTy(CI.getCalledValue());
- Assert1(CallerTy->getNumParams() == CalleeTy->getNumParams(),
- "cannot guarantee tail call due to mismatched parameter counts", &CI);
- Assert1(CallerTy->isVarArg() == CalleeTy->isVarArg(),
- "cannot guarantee tail call due to mismatched varargs", &CI);
- Assert1(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()),
- "cannot guarantee tail call due to mismatched return types", &CI);
+ Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(),
+ "cannot guarantee tail call due to mismatched parameter counts", &CI);
+ Assert(CallerTy->isVarArg() == CalleeTy->isVarArg(),
+ "cannot guarantee tail call due to mismatched varargs", &CI);
+ Assert(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()),
+ "cannot guarantee tail call due to mismatched return types", &CI);
for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
- Assert1(
+ Assert(
isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)),
"cannot guarantee tail call due to mismatched parameter types", &CI);
}
// - The calling conventions of the caller and callee must match.
- Assert1(F->getCallingConv() == CI.getCallingConv(),
- "cannot guarantee tail call due to mismatched calling conv", &CI);
+ Assert(F->getCallingConv() == CI.getCallingConv(),
+ "cannot guarantee tail call due to mismatched calling conv", &CI);
// - All ABI-impacting function attributes, such as sret, byval, inreg,
// returned, and inalloca, must match.
@@ -1933,9 +1942,10 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs);
AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
- Assert2(CallerABIAttrs == CalleeABIAttrs,
- "cannot guarantee tail call due to mismatched ABI impacting "
- "function attributes", &CI, CI.getOperand(I));
+ Assert(CallerABIAttrs == CalleeABIAttrs,
+ "cannot guarantee tail call due to mismatched ABI impacting "
+ "function attributes",
+ &CI, CI.getOperand(I));
}
// - The call must immediately precede a :ref:`ret <i_ret>` instruction,
@@ -1947,18 +1957,18 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
// Handle the optional bitcast.
if (BitCastInst *BI = dyn_cast_or_null<BitCastInst>(Next)) {
- Assert1(BI->getOperand(0) == RetVal,
- "bitcast following musttail call must use the call", BI);
+ Assert(BI->getOperand(0) == RetVal,
+ "bitcast following musttail call must use the call", BI);
RetVal = BI;
Next = BI->getNextNode();
}
// Check the return.
ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
- Assert1(Ret, "musttail call must be precede a ret with an optional bitcast",
- &CI);
- Assert1(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal,
- "musttail call result must be returned", Ret);
+ Assert(Ret, "musttail call must be precede a ret with an optional bitcast",
+ &CI);
+ Assert(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal,
+ "musttail call result must be returned", Ret);
}
void Verifier::visitCallInst(CallInst &CI) {
@@ -1977,8 +1987,8 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
// Verify that there is a landingpad instruction as the first non-PHI
// instruction of the 'unwind' destination.
- Assert1(II.getUnwindDest()->isLandingPad(),
- "The unwind destination does not have a landingpad instruction!",&II);
+ Assert(II.getUnwindDest()->isLandingPad(),
+ "The unwind destination does not have a landingpad instruction!", &II);
if (Function *F = II.getCalledFunction())
// TODO: Ideally we should use visitIntrinsicFunction here. But it uses
@@ -1994,8 +2004,8 @@ void Verifier::visitInvokeInst(InvokeInst &II) {
/// of the same type!
///
void Verifier::visitBinaryOperator(BinaryOperator &B) {
- Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
- "Both operands to a binary operator are not of the same type!", &B);
+ Assert(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+ "Both operands to a binary operator are not of the same type!", &B);
switch (B.getOpcode()) {
// Check that integer arithmetic operators are only used with
@@ -2007,11 +2017,12 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
case Instruction::UDiv:
case Instruction::SRem:
case Instruction::URem:
- Assert1(B.getType()->isIntOrIntVectorTy(),
- "Integer arithmetic operators only work with integral types!", &B);
- Assert1(B.getType() == B.getOperand(0)->getType(),
- "Integer arithmetic operators must have same type "
- "for operands and result!", &B);
+ Assert(B.getType()->isIntOrIntVectorTy(),
+ "Integer arithmetic operators only work with integral types!", &B);
+ Assert(B.getType() == B.getOperand(0)->getType(),
+ "Integer arithmetic operators must have same type "
+ "for operands and result!",
+ &B);
break;
// Check that floating-point arithmetic operators are only used with
// floating-point operands.
@@ -2020,30 +2031,32 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
- Assert1(B.getType()->isFPOrFPVectorTy(),
- "Floating-point arithmetic operators only work with "
- "floating-point types!", &B);
- Assert1(B.getType() == B.getOperand(0)->getType(),
- "Floating-point arithmetic operators must have same type "
- "for operands and result!", &B);
+ Assert(B.getType()->isFPOrFPVectorTy(),
+ "Floating-point arithmetic operators only work with "
+ "floating-point types!",
+ &B);
+ Assert(B.getType() == B.getOperand(0)->getType(),
+ "Floating-point arithmetic operators must have same type "
+ "for operands and result!",
+ &B);
break;
// Check that logical operators are only used with integral operands.
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- Assert1(B.getType()->isIntOrIntVectorTy(),
- "Logical operators only work with integral types!", &B);
- Assert1(B.getType() == B.getOperand(0)->getType(),
- "Logical operators must have same type for operands and result!",
- &B);
+ Assert(B.getType()->isIntOrIntVectorTy(),
+ "Logical operators only work with integral types!", &B);
+ Assert(B.getType() == B.getOperand(0)->getType(),
+ "Logical operators must have same type for operands and result!",
+ &B);
break;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
- Assert1(B.getType()->isIntOrIntVectorTy(),
- "Shifts only work with integral types!", &B);
- Assert1(B.getType() == B.getOperand(0)->getType(),
- "Shift return type must be same as operands!", &B);
+ Assert(B.getType()->isIntOrIntVectorTy(),
+ "Shifts only work with integral types!", &B);
+ Assert(B.getType() == B.getOperand(0)->getType(),
+ "Shift return type must be same as operands!", &B);
break;
default:
llvm_unreachable("Unknown BinaryOperator opcode!");
@@ -2056,15 +2069,15 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
// Check that the operands are the same type
Type *Op0Ty = IC.getOperand(0)->getType();
Type *Op1Ty = IC.getOperand(1)->getType();
- Assert1(Op0Ty == Op1Ty,
- "Both operands to ICmp instruction are not of the same type!", &IC);
+ Assert(Op0Ty == Op1Ty,
+ "Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
- Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
- "Invalid operand types for ICmp instruction", &IC);
+ Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
+ "Invalid operand types for ICmp instruction", &IC);
// Check that the predicate is valid.
- Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
- IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
- "Invalid predicate in ICmp instruction!", &IC);
+ Assert(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+ IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+ "Invalid predicate in ICmp instruction!", &IC);
visitInstruction(IC);
}
@@ -2073,72 +2086,72 @@ void Verifier::visitFCmpInst(FCmpInst &FC) {
// Check that the operands are the same type
Type *Op0Ty = FC.getOperand(0)->getType();
Type *Op1Ty = FC.getOperand(1)->getType();
- Assert1(Op0Ty == Op1Ty,
- "Both operands to FCmp instruction are not of the same type!", &FC);
+ Assert(Op0Ty == Op1Ty,
+ "Both operands to FCmp instruction are not of the same type!", &FC);
// Check that the operands are the right type
- Assert1(Op0Ty->isFPOrFPVectorTy(),
- "Invalid operand types for FCmp instruction", &FC);
+ Assert(Op0Ty->isFPOrFPVectorTy(),
+ "Invalid operand types for FCmp instruction", &FC);
// Check that the predicate is valid.
- Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
- FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
- "Invalid predicate in FCmp instruction!", &FC);
+ Assert(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+ FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+ "Invalid predicate in FCmp instruction!", &FC);
visitInstruction(FC);
}
void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
- Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
- EI.getOperand(1)),
- "Invalid extractelement operands!", &EI);
+ Assert(
+ ExtractElementInst::isValidOperands(EI.getOperand(0), EI.getOperand(1)),
+ "Invalid extractelement operands!", &EI);
visitInstruction(EI);
}
void Verifier::visitInsertElementInst(InsertElementInst &IE) {
- Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
- IE.getOperand(1),
- IE.getOperand(2)),
- "Invalid insertelement operands!", &IE);
+ Assert(InsertElementInst::isValidOperands(IE.getOperand(0), IE.getOperand(1),
+ IE.getOperand(2)),
+ "Invalid insertelement operands!", &IE);
visitInstruction(IE);
}
void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
- Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
- SV.getOperand(2)),
- "Invalid shufflevector operands!", &SV);
+ Assert(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+ SV.getOperand(2)),
+ "Invalid shufflevector operands!", &SV);
visitInstruction(SV);
}
void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *TargetTy = GEP.getPointerOperandType()->getScalarType();
- Assert1(isa<PointerType>(TargetTy),
- "GEP base pointer is not a vector or a vector of pointers", &GEP);
- Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
- "GEP into unsized type!", &GEP);
- Assert1(GEP.getPointerOperandType()->isVectorTy() ==
- GEP.getType()->isVectorTy(), "Vector GEP must return a vector value",
- &GEP);
+ Assert(isa<PointerType>(TargetTy),
+ "GEP base pointer is not a vector or a vector of pointers", &GEP);
+ Assert(cast<PointerType>(TargetTy)->getElementType()->isSized(),
+ "GEP into unsized type!", &GEP);
+ Assert(GEP.getPointerOperandType()->isVectorTy() ==
+ GEP.getType()->isVectorTy(),
+ "Vector GEP must return a vector value", &GEP);
SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
Type *ElTy =
GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
- Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+ Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP);
- Assert2(GEP.getType()->getScalarType()->isPointerTy() &&
- cast<PointerType>(GEP.getType()->getScalarType())->getElementType()
- == ElTy, "GEP is not of right type for indices!", &GEP, ElTy);
+ Assert(GEP.getType()->getScalarType()->isPointerTy() &&
+ cast<PointerType>(GEP.getType()->getScalarType())
+ ->getElementType() == ElTy,
+ "GEP is not of right type for indices!", &GEP, ElTy);
if (GEP.getPointerOperandType()->isVectorTy()) {
// Additional checks for vector GEPs.
unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
- Assert1(GepWidth == GEP.getType()->getVectorNumElements(),
- "Vector GEP result width doesn't match operand's", &GEP);
+ Assert(GepWidth == GEP.getType()->getVectorNumElements(),
+ "Vector GEP result width doesn't match operand's", &GEP);
for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
Type *IndexTy = Idxs[i]->getType();
- Assert1(IndexTy->isVectorTy(),
- "Vector GEP must have vector indices!", &GEP);
+ Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!",
+ &GEP);
unsigned IndexWidth = IndexTy->getVectorNumElements();
- Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+ Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
}
}
visitInstruction(GEP);
@@ -2155,34 +2168,33 @@ void Verifier::visitRangeMetadata(Instruction& I,
"precondition violation");
unsigned NumOperands = Range->getNumOperands();
- Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
+ Assert(NumOperands % 2 == 0, "Unfinished range!", Range);
unsigned NumRanges = NumOperands / 2;
- Assert1(NumRanges >= 1, "It should have at least one range!", Range);
-
+ Assert(NumRanges >= 1, "It should have at least one range!", Range);
+
ConstantRange LastRange(1); // Dummy initial value
for (unsigned i = 0; i < NumRanges; ++i) {
ConstantInt *Low =
mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i));
- Assert1(Low, "The lower limit must be an integer!", Low);
+ Assert(Low, "The lower limit must be an integer!", Low);
ConstantInt *High =
mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i + 1));
- Assert1(High, "The upper limit must be an integer!", High);
- Assert1(High->getType() == Low->getType() &&
- High->getType() == Ty, "Range types must match instruction type!",
- &I);
-
+ Assert(High, "The upper limit must be an integer!", High);
+ Assert(High->getType() == Low->getType() && High->getType() == Ty,
+ "Range types must match instruction type!", &I);
+
APInt HighV = High->getValue();
APInt LowV = Low->getValue();
ConstantRange CurRange(LowV, HighV);
- Assert1(!CurRange.isEmptySet() && !CurRange.isFullSet(),
- "Range must not be empty!", Range);
+ Assert(!CurRange.isEmptySet() && !CurRange.isFullSet(),
+ "Range must not be empty!", Range);
if (i != 0) {
- Assert1(CurRange.intersectWith(LastRange).isEmptySet(),
- "Intervals are overlapping", Range);
- Assert1(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
- Range);
- Assert1(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
- Range);
+ Assert(CurRange.intersectWith(LastRange).isEmptySet(),
+ "Intervals are overlapping", Range);
+ Assert(LowV.sgt(LastRange.getLower()), "Intervals are not in order",
+ Range);
+ Assert(!isContiguous(CurRange, LastRange), "Intervals are contiguous",
+ Range);
}
LastRange = ConstantRange(LowV, HighV);
}
@@ -2192,38 +2204,37 @@ void Verifier::visitRangeMetadata(Instruction& I,
APInt FirstHigh =
mdconst::dyn_extract<ConstantInt>(Range->getOperand(1))->getValue();
ConstantRange FirstRange(FirstLow, FirstHigh);
- Assert1(FirstRange.intersectWith(LastRange).isEmptySet(),
- "Intervals are overlapping", Range);
- Assert1(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
- Range);
+ Assert(FirstRange.intersectWith(LastRange).isEmptySet(),
+ "Intervals are overlapping", Range);
+ Assert(!isContiguous(FirstRange, LastRange), "Intervals are contiguous",
+ Range);
}
}
void Verifier::visitLoadInst(LoadInst &LI) {
PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
- Assert1(PTy, "Load operand must be a pointer.", &LI);
+ Assert(PTy, "Load operand must be a pointer.", &LI);
Type *ElTy = PTy->getElementType();
- Assert2(ElTy == LI.getType(),
- "Load result type does not match pointer operand type!", &LI, ElTy);
- Assert1(LI.getAlignment() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &LI);
+ Assert(ElTy == LI.getType(),
+ "Load result type does not match pointer operand type!", &LI, ElTy);
+ Assert(LI.getAlignment() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &LI);
if (LI.isAtomic()) {
- Assert1(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
- "Load cannot have Release ordering", &LI);
- Assert1(LI.getAlignment() != 0,
- "Atomic load must specify explicit alignment", &LI);
+ Assert(LI.getOrdering() != Release && LI.getOrdering() != AcquireRelease,
+ "Load cannot have Release ordering", &LI);
+ Assert(LI.getAlignment() != 0,
+ "Atomic load must specify explicit alignment", &LI);
if (!ElTy->isPointerTy()) {
- Assert2(ElTy->isIntegerTy(),
- "atomic load operand must have integer type!",
- &LI, ElTy);
+ Assert(ElTy->isIntegerTy(), "atomic load operand must have integer type!",
+ &LI, ElTy);
unsigned Size = ElTy->getPrimitiveSizeInBits();
- Assert2(Size >= 8 && !(Size & (Size - 1)),
- "atomic load operand must be power-of-two byte-sized integer",
- &LI, ElTy);
+ Assert(Size >= 8 && !(Size & (Size - 1)),
+ "atomic load operand must be power-of-two byte-sized integer", &LI,
+ ElTy);
}
} else {
- Assert1(LI.getSynchScope() == CrossThread,
- "Non-atomic load cannot have SynchronizationScope specified", &LI);
+ Assert(LI.getSynchScope() == CrossThread,
+ "Non-atomic load cannot have SynchronizationScope specified", &LI);
}
visitInstruction(LI);
@@ -2231,30 +2242,28 @@ void Verifier::visitLoadInst(LoadInst &LI) {
void Verifier::visitStoreInst(StoreInst &SI) {
PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
- Assert1(PTy, "Store operand must be a pointer.", &SI);
+ Assert(PTy, "Store operand must be a pointer.", &SI);
Type *ElTy = PTy->getElementType();
- Assert2(ElTy == SI.getOperand(0)->getType(),
- "Stored value type does not match pointer operand type!",
- &SI, ElTy);
- Assert1(SI.getAlignment() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &SI);
+ Assert(ElTy == SI.getOperand(0)->getType(),
+ "Stored value type does not match pointer operand type!", &SI, ElTy);
+ Assert(SI.getAlignment() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &SI);
if (SI.isAtomic()) {
- Assert1(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
- "Store cannot have Acquire ordering", &SI);
- Assert1(SI.getAlignment() != 0,
- "Atomic store must specify explicit alignment", &SI);
+ Assert(SI.getOrdering() != Acquire && SI.getOrdering() != AcquireRelease,
+ "Store cannot have Acquire ordering", &SI);
+ Assert(SI.getAlignment() != 0,
+ "Atomic store must specify explicit alignment", &SI);
if (!ElTy->isPointerTy()) {
- Assert2(ElTy->isIntegerTy(),
- "atomic store operand must have integer type!",
- &SI, ElTy);
+ Assert(ElTy->isIntegerTy(),
+ "atomic store operand must have integer type!", &SI, ElTy);
unsigned Size = ElTy->getPrimitiveSizeInBits();
- Assert2(Size >= 8 && !(Size & (Size - 1)),
- "atomic store operand must be power-of-two byte-sized integer",
- &SI, ElTy);
+ Assert(Size >= 8 && !(Size & (Size - 1)),
+ "atomic store operand must be power-of-two byte-sized integer",
+ &SI, ElTy);
}
} else {
- Assert1(SI.getSynchScope() == CrossThread,
- "Non-atomic store cannot have SynchronizationScope specified", &SI);
+ Assert(SI.getSynchScope() == CrossThread,
+ "Non-atomic store cannot have SynchronizationScope specified", &SI);
}
visitInstruction(SI);
}
@@ -2262,15 +2271,15 @@ void Verifier::visitStoreInst(StoreInst &SI) {
void Verifier::visitAllocaInst(AllocaInst &AI) {
SmallPtrSet<const Type*, 4> Visited;
PointerType *PTy = AI.getType();
- Assert1(PTy->getAddressSpace() == 0,
- "Allocation instruction pointer not in the generic address space!",
- &AI);
- Assert1(PTy->getElementType()->isSized(&Visited), "Cannot allocate unsized type",
- &AI);
- Assert1(AI.getArraySize()->getType()->isIntegerTy(),
- "Alloca array size must have integer type", &AI);
- Assert1(AI.getAlignment() <= Value::MaximumAlignment,
- "huge alignment values are unsupported", &AI);
+ Assert(PTy->getAddressSpace() == 0,
+ "Allocation instruction pointer not in the generic address space!",
+ &AI);
+ Assert(PTy->getElementType()->isSized(&Visited),
+ "Cannot allocate unsized type", &AI);
+ Assert(AI.getArraySize()->getType()->isIntegerTy(),
+ "Alloca array size must have integer type", &AI);
+ Assert(AI.getAlignment() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", &AI);
visitInstruction(AI);
}
@@ -2278,87 +2287,83 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
// FIXME: more conditions???
- Assert1(CXI.getSuccessOrdering() != NotAtomic,
- "cmpxchg instructions must be atomic.", &CXI);
- Assert1(CXI.getFailureOrdering() != NotAtomic,
- "cmpxchg instructions must be atomic.", &CXI);
- Assert1(CXI.getSuccessOrdering() != Unordered,
- "cmpxchg instructions cannot be unordered.", &CXI);
- Assert1(CXI.getFailureOrdering() != Unordered,
- "cmpxchg instructions cannot be unordered.", &CXI);
- Assert1(CXI.getSuccessOrdering() >= CXI.getFailureOrdering(),
- "cmpxchg instructions be at least as constrained on success as fail",
- &CXI);
- Assert1(CXI.getFailureOrdering() != Release &&
- CXI.getFailureOrdering() != AcquireRelease,
- "cmpxchg failure ordering cannot include release semantics", &CXI);
+ Assert(CXI.getSuccessOrdering() != NotAtomic,
+ "cmpxchg instructions must be atomic.", &CXI);
+ Assert(CXI.getFailureOrdering() != NotAtomic,
+ "cmpxchg instructions must be atomic.", &CXI);
+ Assert(CXI.getSuccessOrdering() != Unordered,
+ "cmpxchg instructions cannot be unordered.", &CXI);
+ Assert(CXI.getFailureOrdering() != Unordered,
+ "cmpxchg instructions cannot be unordered.", &CXI);
+ Assert(CXI.getSuccessOrdering() >= CXI.getFailureOrdering(),
+ "cmpxchg instructions be at least as constrained on success as fail",
+ &CXI);
+ Assert(CXI.getFailureOrdering() != Release &&
+ CXI.getFailureOrdering() != AcquireRelease,
+ "cmpxchg failure ordering cannot include release semantics", &CXI);
PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
- Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
+ Assert(PTy, "First cmpxchg operand must be a pointer.", &CXI);
Type *ElTy = PTy->getElementType();
- Assert2(ElTy->isIntegerTy(),
- "cmpxchg operand must have integer type!",
- &CXI, ElTy);
+ Assert(ElTy->isIntegerTy(), "cmpxchg operand must have integer type!", &CXI,
+ ElTy);
unsigned Size = ElTy->getPrimitiveSizeInBits();
- Assert2(Size >= 8 && !(Size & (Size - 1)),
- "cmpxchg operand must be power-of-two byte-sized integer",
- &CXI, ElTy);
- Assert2(ElTy == CXI.getOperand(1)->getType(),
- "Expected value type does not match pointer operand type!",
- &CXI, ElTy);
- Assert2(ElTy == CXI.getOperand(2)->getType(),
- "Stored value type does not match pointer operand type!",
- &CXI, ElTy);
+ Assert(Size >= 8 && !(Size & (Size - 1)),
+ "cmpxchg operand must be power-of-two byte-sized integer", &CXI, ElTy);
+ Assert(ElTy == CXI.getOperand(1)->getType(),
+ "Expected value type does not match pointer operand type!", &CXI,
+ ElTy);
+ Assert(ElTy == CXI.getOperand(2)->getType(),
+ "Stored value type does not match pointer operand type!", &CXI, ElTy);
visitInstruction(CXI);
}
void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
- Assert1(RMWI.getOrdering() != NotAtomic,
- "atomicrmw instructions must be atomic.", &RMWI);
- Assert1(RMWI.getOrdering() != Unordered,
- "atomicrmw instructions cannot be unordered.", &RMWI);
+ Assert(RMWI.getOrdering() != NotAtomic,
+ "atomicrmw instructions must be atomic.", &RMWI);
+ Assert(RMWI.getOrdering() != Unordered,
+ "atomicrmw instructions cannot be unordered.", &RMWI);
PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
- Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
+ Assert(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
Type *ElTy = PTy->getElementType();
- Assert2(ElTy->isIntegerTy(),
- "atomicrmw operand must have integer type!",
- &RMWI, ElTy);
+ Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!",
+ &RMWI, ElTy);
unsigned Size = ElTy->getPrimitiveSizeInBits();
- Assert2(Size >= 8 && !(Size & (Size - 1)),
- "atomicrmw operand must be power-of-two byte-sized integer",
- &RMWI, ElTy);
- Assert2(ElTy == RMWI.getOperand(1)->getType(),
- "Argument value type does not match pointer operand type!",
- &RMWI, ElTy);
- Assert1(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
- RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
- "Invalid binary operation!", &RMWI);
+ Assert(Size >= 8 && !(Size & (Size - 1)),
+ "atomicrmw operand must be power-of-two byte-sized integer", &RMWI,
+ ElTy);
+ Assert(ElTy == RMWI.getOperand(1)->getType(),
+ "Argument value type does not match pointer operand type!", &RMWI,
+ ElTy);
+ Assert(AtomicRMWInst::FIRST_BINOP <= RMWI.getOperation() &&
+ RMWI.getOperation() <= AtomicRMWInst::LAST_BINOP,
+ "Invalid binary operation!", &RMWI);
visitInstruction(RMWI);
}
void Verifier::visitFenceInst(FenceInst &FI) {
const AtomicOrdering Ordering = FI.getOrdering();
- Assert1(Ordering == Acquire || Ordering == Release ||
- Ordering == AcquireRelease || Ordering == SequentiallyConsistent,
- "fence instructions may only have "
- "acquire, release, acq_rel, or seq_cst ordering.", &FI);
+ Assert(Ordering == Acquire || Ordering == Release ||
+ Ordering == AcquireRelease || Ordering == SequentiallyConsistent,
+ "fence instructions may only have "
+ "acquire, release, acq_rel, or seq_cst ordering.",
+ &FI);
visitInstruction(FI);
}
void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
- Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
- EVI.getIndices()) ==
- EVI.getType(),
- "Invalid ExtractValueInst operands!", &EVI);
+ Assert(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+ EVI.getIndices()) == EVI.getType(),
+ "Invalid ExtractValueInst operands!", &EVI);
visitInstruction(EVI);
}
void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
- Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
- IVI.getIndices()) ==
- IVI.getOperand(1)->getType(),
- "Invalid InsertValueInst operands!", &IVI);
+ Assert(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+ IVI.getIndices()) ==
+ IVI.getOperand(1)->getType(),
+ "Invalid InsertValueInst operands!", &IVI);
visitInstruction(IVI);
}
@@ -2368,43 +2373,44 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
// The landingpad instruction is ill-formed if it doesn't have any clauses and
// isn't a cleanup.
- Assert1(LPI.getNumClauses() > 0 || LPI.isCleanup(),
- "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
+ Assert(LPI.getNumClauses() > 0 || LPI.isCleanup(),
+ "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
// The landingpad instruction defines its parent as a landing pad block. The
// landing pad block may be branched to only by the unwind edge of an invoke.
for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
- Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
- "Block containing LandingPadInst must be jumped to "
- "only by the unwind edge of an invoke.", &LPI);
+ Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
+ "Block containing LandingPadInst must be jumped to "
+ "only by the unwind edge of an invoke.",
+ &LPI);
}
// The landingpad instruction must be the first non-PHI instruction in the
// block.
- Assert1(LPI.getParent()->getLandingPadInst() == &LPI,
- "LandingPadInst not the first non-PHI instruction in the block.",
- &LPI);
+ Assert(LPI.getParent()->getLandingPadInst() == &LPI,
+ "LandingPadInst not the first non-PHI instruction in the block.",
+ &LPI);
// The personality functions for all landingpad instructions within the same
// function should match.
if (PersonalityFn)
- Assert1(LPI.getPersonalityFn() == PersonalityFn,
- "Personality function doesn't match others in function", &LPI);
+ Assert(LPI.getPersonalityFn() == PersonalityFn,
+ "Personality function doesn't match others in function", &LPI);
PersonalityFn = LPI.getPersonalityFn();
// All operands must be constants.
- Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!",
- &LPI);
+ Assert(isa<Constant>(PersonalityFn), "Personality function is not constant!",
+ &LPI);
for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
Constant *Clause = LPI.getClause(i);
if (LPI.isCatch(i)) {
- Assert1(isa<PointerType>(Clause->getType()),
- "Catch operand does not have pointer type!", &LPI);
+ Assert(isa<PointerType>(Clause->getType()),
+ "Catch operand does not have pointer type!", &LPI);
} else {
- Assert1(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
- Assert1(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
- "Filter operand is not an array of constants!", &LPI);
+ Assert(LPI.isFilter(i), "Clause is neither catch nor filter!", &LPI);
+ Assert(isa<ConstantArray>(Clause) || isa<ConstantAggregateZero>(Clause),
+ "Filter operand is not an array of constants!", &LPI);
}
}
@@ -2422,46 +2428,46 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
}
const Use &U = I.getOperandUse(i);
- Assert2(InstsInThisBlock.count(Op) || DT.dominates(Op, U),
- "Instruction does not dominate all uses!", Op, &I);
+ Assert(InstsInThisBlock.count(Op) || DT.dominates(Op, U),
+ "Instruction does not dominate all uses!", Op, &I);
}
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
BasicBlock *BB = I.getParent();
- Assert1(BB, "Instruction not embedded in basic block!", &I);
+ Assert(BB, "Instruction not embedded in basic block!", &I);
if (!isa<PHINode>(I)) { // Check that non-phi nodes are not self referential
for (User *U : I.users()) {
- Assert1(U != (User*)&I || !DT.isReachableFromEntry(BB),
- "Only PHI nodes may reference their own value!", &I);
+ Assert(U != (User *)&I || !DT.isReachableFromEntry(BB),
+ "Only PHI nodes may reference their own value!", &I);
}
}
// Check that void typed values don't have names
- Assert1(!I.getType()->isVoidTy() || !I.hasName(),
- "Instruction has a name, but provides a void value!", &I);
+ Assert(!I.getType()->isVoidTy() || !I.hasName(),
+ "Instruction has a name, but provides a void value!", &I);
// Check that the return value of the instruction is either void or a legal
// value type.
- Assert1(I.getType()->isVoidTy() ||
- I.getType()->isFirstClassType(),
- "Instruction returns a non-scalar type!", &I);
+ Assert(I.getType()->isVoidTy() || I.getType()->isFirstClassType(),
+ "Instruction returns a non-scalar type!", &I);
// Check that the instruction doesn't produce metadata. Calls are already
// checked against the callee type.
- Assert1(!I.getType()->isMetadataTy() ||
- isa<CallInst>(I) || isa<InvokeInst>(I),
- "Invalid use of metadata!", &I);
+ Assert(!I.getType()->isMetadataTy() || isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Invalid use of metadata!", &I);
// Check that all uses of the instruction, if they are instructions
// themselves, actually have parent basic blocks. If the use is not an
// instruction, it is an error!
for (Use &U : I.uses()) {
if (Instruction *Used = dyn_cast<Instruction>(U.getUser()))
- Assert2(Used->getParent() != nullptr, "Instruction referencing"
- " instruction not embedded in a basic block!", &I, Used);
+ Assert(Used->getParent() != nullptr,
+ "Instruction referencing"
+ " instruction not embedded in a basic block!",
+ &I, Used);
else {
CheckFailed("Use of instruction is not an instruction!", U);
return;
@@ -2469,44 +2475,46 @@ void Verifier::visitInstruction(Instruction &I) {
}
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- Assert1(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
+ Assert(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
// Check to make sure that only first-class-values are operands to
// instructions.
if (!I.getOperand(i)->getType()->isFirstClassType()) {
- Assert1(0, "Instruction operands must be first-class values!", &I);
+ Assert(0, "Instruction operands must be first-class values!", &I);
}
if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
// Check to make sure that the "address of" an intrinsic function is never
// taken.
- Assert1(!F->isIntrinsic() || i == (isa<CallInst>(I) ? e-1 :
- isa<InvokeInst>(I) ? e-3 : 0),
- "Cannot take the address of an intrinsic!", &I);
- Assert1(!F->isIntrinsic() || isa<CallInst>(I) ||
+ Assert(
+ !F->isIntrinsic() ||
+ i == (isa<CallInst>(I) ? e - 1 : isa<InvokeInst>(I) ? e - 3 : 0),
+ "Cannot take the address of an intrinsic!", &I);
+ Assert(
+ !F->isIntrinsic() || isa<CallInst>(I) ||
F->getIntrinsicID() == Intrinsic::donothing ||
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint,
- "Cannot invoke an intrinsinc other than"
- " donothing or patchpoint", &I);
- Assert1(F->getParent() == M, "Referencing function in another module!",
- &I);
+ "Cannot invoke an intrinsinc other than"
+ " donothing or patchpoint",
+ &I);
+ Assert(F->getParent() == M, "Referencing function in another module!",
+ &I);
} else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
- Assert1(OpBB->getParent() == BB->getParent(),
- "Referring to a basic block in another function!", &I);
+ Assert(OpBB->getParent() == BB->getParent(),
+ "Referring to a basic block in another function!", &I);
} else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
- Assert1(OpArg->getParent() == BB->getParent(),
- "Referring to an argument in another function!", &I);
+ Assert(OpArg->getParent() == BB->getParent(),
+ "Referring to an argument in another function!", &I);
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
- Assert1(GV->getParent() == M, "Referencing global in another module!",
- &I);
+ Assert(GV->getParent() == M, "Referencing global in another module!", &I);
} else if (isa<Instruction>(I.getOperand(i))) {
verifyDominatesUse(I, i);
} else if (isa<InlineAsm>(I.getOperand(i))) {
- Assert1((i + 1 == e && isa<CallInst>(I)) ||
- (i + 3 == e && isa<InvokeInst>(I)),
- "Cannot take the address of an inline asm!", &I);
+ Assert((i + 1 == e && isa<CallInst>(I)) ||
+ (i + 3 == e && isa<InvokeInst>(I)),
+ "Cannot take the address of an inline asm!", &I);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I.getOperand(i))) {
if (CE->getType()->isPtrOrPtrVectorTy()) {
// If we have a ConstantExpr pointer, we need to see if it came from an
@@ -2532,31 +2540,37 @@ void Verifier::visitInstruction(Instruction &I) {
}
if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) {
- Assert1(I.getType()->isFPOrFPVectorTy(),
- "fpmath requires a floating point result!", &I);
- Assert1(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
+ Assert(I.getType()->isFPOrFPVectorTy(),
+ "fpmath requires a floating point result!", &I);
+ Assert(MD->getNumOperands() == 1, "fpmath takes one operand!", &I);
if (ConstantFP *CFP0 =
mdconst::dyn_extract_or_null<ConstantFP>(MD->getOperand(0))) {
APFloat Accuracy = CFP0->getValueAPF();
- Assert1(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(),
- "fpmath accuracy not a positive number!", &I);
+ Assert(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(),
+ "fpmath accuracy not a positive number!", &I);
} else {
- Assert1(false, "invalid fpmath accuracy!", &I);
+ Assert(false, "invalid fpmath accuracy!", &I);
}
}
if (MDNode *Range = I.getMetadata(LLVMContext::MD_range)) {
- Assert1(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
- "Ranges are only for loads, calls and invokes!", &I);
+ Assert(isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Ranges are only for loads, calls and invokes!", &I);
visitRangeMetadata(I, Range, I.getType());
}
if (I.getMetadata(LLVMContext::MD_nonnull)) {
- Assert1(I.getType()->isPointerTy(),
- "nonnull applies only to pointer types", &I);
- Assert1(isa<LoadInst>(I),
- "nonnull applies only to load instructions, use attributes"
- " for calls or invokes", &I);
+ Assert(I.getType()->isPointerTy(), "nonnull applies only to pointer types",
+ &I);
+ Assert(isa<LoadInst>(I),
+ "nonnull applies only to load instructions, use attributes"
+ " for calls or invokes",
+ &I);
+ }
+
+ if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
+ Assert(isa<MDLocation>(N), "invalid !dbg metadata attachment", &I, N);
+ visitMDNode(*N);
}
InstsInThisBlock.insert(&I);
@@ -2717,7 +2731,7 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg,
// If there are no descriptors left, then it can't be a vararg.
if (Infos.empty())
- return isVarArg ? true : false;
+ return isVarArg;
// There should be only one descriptor remaining at this point.
if (Infos.size() != 1)
@@ -2727,7 +2741,7 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg,
IITDescriptor D = Infos.front();
Infos = Infos.slice(1);
if (D.Kind == IITDescriptor::VarArg)
- return isVarArg ? false : true;
+ return !isVarArg;
return true;
}
@@ -2736,8 +2750,8 @@ Verifier::VerifyIntrinsicIsVarArg(bool isVarArg,
///
void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
Function *IF = CI.getCalledFunction();
- Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
- IF);
+ Assert(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+ IF);
// Verify that the intrinsic prototype lines up with what the .td files
// describe.
@@ -2749,31 +2763,33 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
SmallVector<Type *, 4> ArgTys;
- Assert1(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys),
- "Intrinsic has incorrect return type!", IF);
+ Assert(!VerifyIntrinsicType(IFTy->getReturnType(), TableRef, ArgTys),
+ "Intrinsic has incorrect return type!", IF);
for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i)
- Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys),
- "Intrinsic has incorrect argument type!", IF);
+ Assert(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys),
+ "Intrinsic has incorrect argument type!", IF);
// Verify if the intrinsic call matches the vararg property.
if (IsVarArg)
- Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
- "Intrinsic was not defined with variable arguments!", IF);
+ Assert(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
+ "Intrinsic was not defined with variable arguments!", IF);
else
- Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
- "Callsite was not defined with variable arguments!", IF);
+ Assert(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef),
+ "Callsite was not defined with variable arguments!", IF);
// All descriptors should be absorbed by now.
- Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF);
+ Assert(TableRef.empty(), "Intrinsic has too few arguments!", IF);
// Now that we have the intrinsic ID and the actual argument types (and we
// know they are legal for the intrinsic!) get the intrinsic name through the
// usual means. This allows us to verify the mangling of argument types into
// the name.
const std::string ExpectedName = Intrinsic::getName(ID, ArgTys);
- Assert1(ExpectedName == IF->getName(),
- "Intrinsic name not mangled correctly for type arguments! "
- "Should be: " + ExpectedName, IF);
+ Assert(ExpectedName == IF->getName(),
+ "Intrinsic name not mangled correctly for type arguments! "
+ "Should be: " +
+ ExpectedName,
+ IF);
// If the intrinsic takes MDNode arguments, verify that they are either global
// or are local to *this* function.
@@ -2786,95 +2802,123 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
break;
case Intrinsic::ctlz: // llvm.ctlz
case Intrinsic::cttz: // llvm.cttz
- Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
- "is_zero_undef argument of bit counting intrinsics must be a "
- "constant int", &CI);
+ Assert(isa<ConstantInt>(CI.getArgOperand(1)),
+ "is_zero_undef argument of bit counting intrinsics must be a "
+ "constant int",
+ &CI);
+ break;
+ case Intrinsic::dbg_declare: // llvm.dbg.declare
+ Assert(isa<MetadataAsValue>(CI.getArgOperand(0)),
+ "invalid llvm.dbg.declare intrinsic call 1", &CI);
+ visitDbgIntrinsic("declare", cast<DbgDeclareInst>(CI));
+ break;
+ case Intrinsic::dbg_value: // llvm.dbg.value
+ visitDbgIntrinsic("value", cast<DbgValueInst>(CI));
break;
- case Intrinsic::dbg_declare: { // llvm.dbg.declare
- Assert1(CI.getArgOperand(0) && isa<MetadataAsValue>(CI.getArgOperand(0)),
- "invalid llvm.dbg.declare intrinsic call 1", &CI);
- } break;
case Intrinsic::memcpy:
case Intrinsic::memmove:
- case Intrinsic::memset:
- Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
- "alignment argument of memory intrinsics must be a constant int",
- &CI);
- Assert1(isa<ConstantInt>(CI.getArgOperand(4)),
- "isvolatile argument of memory intrinsics must be a constant int",
- &CI);
+ case Intrinsic::memset: {
+ ConstantInt *AlignCI = dyn_cast<ConstantInt>(CI.getArgOperand(3));
+ Assert(AlignCI,
+ "alignment argument of memory intrinsics must be a constant int",
+ &CI);
+ const APInt &AlignVal = AlignCI->getValue();
+ Assert(AlignCI->isZero() || AlignVal.isPowerOf2(),
+ "alignment argument of memory intrinsics must be a power of 2", &CI);
+ Assert(isa<ConstantInt>(CI.getArgOperand(4)),
+ "isvolatile argument of memory intrinsics must be a constant int",
+ &CI);
break;
+ }
case Intrinsic::gcroot:
case Intrinsic::gcwrite:
case Intrinsic::gcread:
if (ID == Intrinsic::gcroot) {
AllocaInst *AI =
dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
- Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
- Assert1(isa<Constant>(CI.getArgOperand(1)),
- "llvm.gcroot parameter #2 must be a constant.", &CI);
+ Assert(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
+ Assert(isa<Constant>(CI.getArgOperand(1)),
+ "llvm.gcroot parameter #2 must be a constant.", &CI);
if (!AI->getType()->getElementType()->isPointerTy()) {
- Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
- "llvm.gcroot parameter #1 must either be a pointer alloca, "
- "or argument #2 must be a non-null constant.", &CI);
+ Assert(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+ "llvm.gcroot parameter #1 must either be a pointer alloca, "
+ "or argument #2 must be a non-null constant.",
+ &CI);
}
}
- Assert1(CI.getParent()->getParent()->hasGC(),
- "Enclosing function does not use GC.", &CI);
+ Assert(CI.getParent()->getParent()->hasGC(),
+ "Enclosing function does not use GC.", &CI);
break;
case Intrinsic::init_trampoline:
- Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
- "llvm.init_trampoline parameter #2 must resolve to a function.",
- &CI);
+ Assert(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
+ "llvm.init_trampoline parameter #2 must resolve to a function.",
+ &CI);
break;
case Intrinsic::prefetch:
- Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
- isa<ConstantInt>(CI.getArgOperand(2)) &&
- cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
- cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
- "invalid arguments to llvm.prefetch",
- &CI);
+ Assert(isa<ConstantInt>(CI.getArgOperand(1)) &&
+ isa<ConstantInt>(CI.getArgOperand(2)) &&
+ cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+ cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
+ "invalid arguments to llvm.prefetch", &CI);
break;
case Intrinsic::stackprotector:
- Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
- "llvm.stackprotector parameter #2 must resolve to an alloca.",
- &CI);
+ Assert(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
+ "llvm.stackprotector parameter #2 must resolve to an alloca.", &CI);
break;
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
- Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
- "size argument of memory use markers must be a constant integer",
- &CI);
+ Assert(isa<ConstantInt>(CI.getArgOperand(0)),
+ "size argument of memory use markers must be a constant integer",
+ &CI);
break;
case Intrinsic::invariant_end:
- Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
- "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+ Assert(isa<ConstantInt>(CI.getArgOperand(1)),
+ "llvm.invariant.end parameter #2 must be a constant integer", &CI);
break;
- case Intrinsic::frameallocate: {
+ case Intrinsic::frameescape: {
BasicBlock *BB = CI.getParent();
- Assert1(BB == &BB->getParent()->front(),
- "llvm.frameallocate used outside of entry block", &CI);
- Assert1(!SawFrameAllocate,
- "multiple calls to llvm.frameallocate in one function", &CI);
- SawFrameAllocate = true;
- Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
- "llvm.frameallocate argument must be constant integer size", &CI);
+ Assert(BB == &BB->getParent()->front(),
+ "llvm.frameescape used outside of entry block", &CI);
+ Assert(!SawFrameEscape,
+ "multiple calls to llvm.frameescape in one function", &CI);
+ for (Value *Arg : CI.arg_operands()) {
+ auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
+ Assert(AI && AI->isStaticAlloca(),
+ "llvm.frameescape only accepts static allocas", &CI);
+ }
+ FrameEscapeInfo[BB->getParent()].first = CI.getNumArgOperands();
+ SawFrameEscape = true;
break;
}
case Intrinsic::framerecover: {
Value *FnArg = CI.getArgOperand(0)->stripPointerCasts();
Function *Fn = dyn_cast<Function>(FnArg);
- Assert1(Fn && !Fn->isDeclaration(), "llvm.framerecover first "
- "argument must be function defined in this module", &CI);
+ Assert(Fn && !Fn->isDeclaration(),
+ "llvm.framerecover first "
+ "argument must be function defined in this module",
+ &CI);
+ auto *IdxArg = dyn_cast<ConstantInt>(CI.getArgOperand(2));
+ Assert(IdxArg, "idx argument of llvm.framerecover must be a constant int",
+ &CI);
+ auto &Entry = FrameEscapeInfo[Fn];
+ Entry.second = unsigned(
+ std::max(uint64_t(Entry.second), IdxArg->getLimitedValue(~0U) + 1));
+ break;
+ }
+
+ case Intrinsic::eh_unwindhelp: {
+ auto *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
+ Assert(AI && AI->isStaticAlloca(),
+ "llvm.eh.unwindhelp requires a static alloca", &CI);
break;
}
case Intrinsic::experimental_gc_statepoint:
- Assert1(!CI.isInlineAsm(),
- "gc.statepoint support for inline assembly unimplemented", &CI);
+ Assert(!CI.isInlineAsm(),
+ "gc.statepoint support for inline assembly unimplemented", &CI);
VerifyStatepoint(ImmutableCallSite(&CI));
break;
@@ -2886,56 +2930,52 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
CallSite StatepointCS(CI.getArgOperand(0));
const Function *StatepointFn =
StatepointCS.getInstruction() ? StatepointCS.getCalledFunction() : nullptr;
- Assert2(StatepointFn && StatepointFn->isDeclaration() &&
- StatepointFn->getIntrinsicID() == Intrinsic::experimental_gc_statepoint,
- "gc.result operand #1 must be from a statepoint",
- &CI, CI.getArgOperand(0));
+ Assert(StatepointFn && StatepointFn->isDeclaration() &&
+ StatepointFn->getIntrinsicID() ==
+ Intrinsic::experimental_gc_statepoint,
+ "gc.result operand #1 must be from a statepoint", &CI,
+ CI.getArgOperand(0));
// Assert that result type matches wrapped callee.
const Value *Target = StatepointCS.getArgument(0);
const PointerType *PT = cast<PointerType>(Target->getType());
const FunctionType *TargetFuncType =
cast<FunctionType>(PT->getElementType());
- Assert1(CI.getType() == TargetFuncType->getReturnType(),
- "gc.result result type does not match wrapped callee",
- &CI);
+ Assert(CI.getType() == TargetFuncType->getReturnType(),
+ "gc.result result type does not match wrapped callee", &CI);
break;
}
case Intrinsic::experimental_gc_relocate: {
- Assert1(CI.getNumArgOperands() == 3, "wrong number of arguments", &CI);
+ Assert(CI.getNumArgOperands() == 3, "wrong number of arguments", &CI);
// Check that this relocate is correctly tied to the statepoint
// This is case for relocate on the unwinding path of an invoke statepoint
if (ExtractValueInst *ExtractValue =
dyn_cast<ExtractValueInst>(CI.getArgOperand(0))) {
- Assert1(isa<LandingPadInst>(ExtractValue->getAggregateOperand()),
- "gc relocate on unwind path incorrectly linked to the statepoint",
- &CI);
+ Assert(isa<LandingPadInst>(ExtractValue->getAggregateOperand()),
+ "gc relocate on unwind path incorrectly linked to the statepoint",
+ &CI);
const BasicBlock *invokeBB =
ExtractValue->getParent()->getUniquePredecessor();
// Landingpad relocates should have only one predecessor with invoke
// statepoint terminator
- Assert1(invokeBB,
- "safepoints should have unique landingpads",
- ExtractValue->getParent());
- Assert1(invokeBB->getTerminator(),
- "safepoint block should be well formed",
- invokeBB);
- Assert1(isStatepoint(invokeBB->getTerminator()),
- "gc relocate should be linked to a statepoint",
- invokeBB);
+ Assert(invokeBB, "safepoints should have unique landingpads",
+ ExtractValue->getParent());
+ Assert(invokeBB->getTerminator(), "safepoint block should be well formed",
+ invokeBB);
+ Assert(isStatepoint(invokeBB->getTerminator()),
+ "gc relocate should be linked to a statepoint", invokeBB);
}
else {
// In all other cases relocate should be tied to the statepoint directly.
// This covers relocates on a normal return path of invoke statepoint and
// relocates of a call statepoint
auto Token = CI.getArgOperand(0);
- Assert2(isa<Instruction>(Token) && isStatepoint(cast<Instruction>(Token)),
- "gc relocate is incorrectly tied to the statepoint",
- &CI, Token);
+ Assert(isa<Instruction>(Token) && isStatepoint(cast<Instruction>(Token)),
+ "gc relocate is incorrectly tied to the statepoint", &CI, Token);
}
// Verify rest of the relocate arguments
@@ -2945,53 +2985,74 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
// Both the base and derived must be piped through the safepoint
Value* Base = CI.getArgOperand(1);
- Assert1(isa<ConstantInt>(Base),
- "gc.relocate operand #2 must be integer offset", &CI);
-
+ Assert(isa<ConstantInt>(Base),
+ "gc.relocate operand #2 must be integer offset", &CI);
+
Value* Derived = CI.getArgOperand(2);
- Assert1(isa<ConstantInt>(Derived),
- "gc.relocate operand #3 must be integer offset", &CI);
+ Assert(isa<ConstantInt>(Derived),
+ "gc.relocate operand #3 must be integer offset", &CI);
const int BaseIndex = cast<ConstantInt>(Base)->getZExtValue();
const int DerivedIndex = cast<ConstantInt>(Derived)->getZExtValue();
// Check the bounds
- Assert1(0 <= BaseIndex &&
- BaseIndex < (int)StatepointCS.arg_size(),
- "gc.relocate: statepoint base index out of bounds", &CI);
- Assert1(0 <= DerivedIndex &&
- DerivedIndex < (int)StatepointCS.arg_size(),
- "gc.relocate: statepoint derived index out of bounds", &CI);
+ Assert(0 <= BaseIndex && BaseIndex < (int)StatepointCS.arg_size(),
+ "gc.relocate: statepoint base index out of bounds", &CI);
+ Assert(0 <= DerivedIndex && DerivedIndex < (int)StatepointCS.arg_size(),
+ "gc.relocate: statepoint derived index out of bounds", &CI);
// Check that BaseIndex and DerivedIndex fall within the 'gc parameters'
// section of the statepoint's argument
- const int NumCallArgs =
+ Assert(StatepointCS.arg_size() > 0,
+ "gc.statepoint: insufficient arguments");
+ Assert(isa<ConstantInt>(StatepointCS.getArgument(1)),
+ "gc.statement: number of call arguments must be constant integer");
+ const unsigned NumCallArgs =
cast<ConstantInt>(StatepointCS.getArgument(1))->getZExtValue();
+ Assert(StatepointCS.arg_size() > NumCallArgs+3,
+ "gc.statepoint: mismatch in number of call arguments");
+ Assert(isa<ConstantInt>(StatepointCS.getArgument(NumCallArgs+3)),
+ "gc.statepoint: number of deoptimization arguments must be "
+ "a constant integer");
const int NumDeoptArgs =
cast<ConstantInt>(StatepointCS.getArgument(NumCallArgs + 3))->getZExtValue();
const int GCParamArgsStart = NumCallArgs + NumDeoptArgs + 4;
const int GCParamArgsEnd = StatepointCS.arg_size();
- Assert1(GCParamArgsStart <= BaseIndex &&
- BaseIndex < GCParamArgsEnd,
- "gc.relocate: statepoint base index doesn't fall within the "
- "'gc parameters' section of the statepoint call", &CI);
- Assert1(GCParamArgsStart <= DerivedIndex &&
- DerivedIndex < GCParamArgsEnd,
- "gc.relocate: statepoint derived index doesn't fall within the "
- "'gc parameters' section of the statepoint call", &CI);
-
+ Assert(GCParamArgsStart <= BaseIndex && BaseIndex < GCParamArgsEnd,
+ "gc.relocate: statepoint base index doesn't fall within the "
+ "'gc parameters' section of the statepoint call",
+ &CI);
+ Assert(GCParamArgsStart <= DerivedIndex && DerivedIndex < GCParamArgsEnd,
+ "gc.relocate: statepoint derived index doesn't fall within the "
+ "'gc parameters' section of the statepoint call",
+ &CI);
// Assert that the result type matches the type of the relocated pointer
GCRelocateOperands Operands(&CI);
- Assert1(Operands.derivedPtr()->getType() == CI.getType(),
- "gc.relocate: relocating a pointer shouldn't change its type",
- &CI);
+ Assert(Operands.derivedPtr()->getType() == CI.getType(),
+ "gc.relocate: relocating a pointer shouldn't change its type", &CI);
break;
}
};
}
-void DebugInfoVerifier::verifyDebugInfo() {
- if (!VerifyDebugInfo)
+template <class DbgIntrinsicTy>
+void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
+ auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata();
+ Assert(isa<ValueAsMetadata>(MD) ||
+ (isa<MDNode>(MD) && !cast<MDNode>(MD)->getNumOperands()),
+ "invalid llvm.dbg." + Kind + " intrinsic address/value", &DII, MD);
+ Assert(isa<MDLocalVariable>(DII.getRawVariable()),
+ "invalid llvm.dbg." + Kind + " intrinsic variable", &DII,
+ DII.getRawVariable());
+ Assert(isa<MDExpression>(DII.getRawExpression()),
+ "invalid llvm.dbg." + Kind + " intrinsic expression", &DII,
+ DII.getRawExpression());
+}
+
+void Verifier::verifyDebugInfo() {
+ // Run the debug info verifier only if the regular verifier succeeds, since
+ // sometimes checks that have already failed will cause crashes here.
+ if (EverBroken || !VerifyDebugInfo)
return;
DebugInfoFinder Finder;
@@ -3002,23 +3063,23 @@ void DebugInfoVerifier::verifyDebugInfo() {
//
// NOTE: The loud braces are necessary for MSVC compatibility.
for (DICompileUnit CU : Finder.compile_units()) {
- Assert1(CU.Verify(), "DICompileUnit does not Verify!", CU);
+ Assert(CU.Verify(), "DICompileUnit does not Verify!", CU);
}
for (DISubprogram S : Finder.subprograms()) {
- Assert1(S.Verify(), "DISubprogram does not Verify!", S);
+ Assert(S.Verify(), "DISubprogram does not Verify!", S);
}
for (DIGlobalVariable GV : Finder.global_variables()) {
- Assert1(GV.Verify(), "DIGlobalVariable does not Verify!", GV);
+ Assert(GV.Verify(), "DIGlobalVariable does not Verify!", GV);
}
for (DIType T : Finder.types()) {
- Assert1(T.Verify(), "DIType does not Verify!", T);
+ Assert(T.Verify(), "DIType does not Verify!", T);
}
for (DIScope S : Finder.scopes()) {
- Assert1(S.Verify(), "DIScope does not Verify!", S);
+ Assert(S.Verify(), "DIScope does not Verify!", S);
}
}
-void DebugInfoVerifier::processInstructions(DebugInfoFinder &Finder) {
+void Verifier::processInstructions(DebugInfoFinder &Finder) {
for (const Function &F : *M)
for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
if (MDNode *MD = I->getMetadata(LLVMContext::MD_dbg))
@@ -3028,25 +3089,16 @@ void DebugInfoVerifier::processInstructions(DebugInfoFinder &Finder) {
}
}
-void DebugInfoVerifier::processCallInst(DebugInfoFinder &Finder,
- const CallInst &CI) {
+void Verifier::processCallInst(DebugInfoFinder &Finder, const CallInst &CI) {
if (Function *F = CI.getCalledFunction())
if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
switch (ID) {
- case Intrinsic::dbg_declare: {
- auto *DDI = cast<DbgDeclareInst>(&CI);
- Finder.processDeclare(*M, DDI);
- if (auto E = DDI->getExpression())
- Assert1(DIExpression(E).Verify(), "DIExpression does not Verify!", E);
+ case Intrinsic::dbg_declare:
+ Finder.processDeclare(*M, cast<DbgDeclareInst>(&CI));
break;
- }
- case Intrinsic::dbg_value: {
- auto *DVI = cast<DbgValueInst>(&CI);
- Finder.processValue(*M, DVI);
- if (auto E = DVI->getExpression())
- Assert1(DIExpression(E).Verify(), "DIExpression does not Verify!", E);
+ case Intrinsic::dbg_value:
+ Finder.processValue(*M, cast<DbgValueInst>(&CI));
break;
- }
default:
break;
}
@@ -3079,8 +3131,7 @@ bool llvm::verifyModule(const Module &M, raw_ostream *OS) {
// Note that this function's return value is inverted from what you would
// expect of a function called "verify".
- DebugInfoVerifier DIV(OS ? *OS : NullStr);
- return !V.verify(M) || !DIV.verify(M) || Broken;
+ return !V.verify(M) || Broken;
}
namespace {
@@ -3090,7 +3141,7 @@ struct VerifierLegacyPass : public FunctionPass {
Verifier V;
bool FatalErrors;
- VerifierLegacyPass() : FunctionPass(ID), FatalErrors(true) {
+ VerifierLegacyPass() : FunctionPass(ID), V(dbgs()), FatalErrors(true) {
initializeVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
}
explicit VerifierLegacyPass(bool FatalErrors)
@@ -3116,48 +3167,15 @@ struct VerifierLegacyPass : public FunctionPass {
AU.setPreservesAll();
}
};
-struct DebugInfoVerifierLegacyPass : public ModulePass {
- static char ID;
-
- DebugInfoVerifier V;
- bool FatalErrors;
-
- DebugInfoVerifierLegacyPass() : ModulePass(ID), FatalErrors(true) {
- initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- explicit DebugInfoVerifierLegacyPass(bool FatalErrors)
- : ModulePass(ID), V(dbgs()), FatalErrors(FatalErrors) {
- initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (!V.verify(M) && FatalErrors)
- report_fatal_error("Broken debug info found, compilation aborted!");
-
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-};
}
char VerifierLegacyPass::ID = 0;
INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false)
-char DebugInfoVerifierLegacyPass::ID = 0;
-INITIALIZE_PASS(DebugInfoVerifierLegacyPass, "verify-di", "Debug Info Verifier",
- false, false)
-
FunctionPass *llvm::createVerifierPass(bool FatalErrors) {
return new VerifierLegacyPass(FatalErrors);
}
-ModulePass *llvm::createDebugInfoVerifierPass(bool FatalErrors) {
- return new DebugInfoVerifierLegacyPass(FatalErrors);
-}
-
PreservedAnalyses VerifierPass::run(Module &M) {
if (verifyModule(M, &dbgs()) && FatalErrors)
report_fatal_error("Broken module found, compilation aborted!");
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index ad5b22b..bc2448d 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = Analysis AsmParser Bitcode CodeGen DebugInfo ExecutionEngine LineEditor Linker IR IRReader LTO MC Object Option ProfileData Support TableGen Target Transforms
+subdirectories = Analysis AsmParser Bitcode CodeGen DebugInfo ExecutionEngine LineEditor Linker IR IRReader LTO MC Object Option Passes ProfileData Support TableGen Target Transforms
[component_0]
type = Group
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 61c2749..a6f980b 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -71,7 +71,7 @@ LTOCodeGenerator::LTOCodeGenerator()
LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
: OwnedContext(std::move(Context)), Context(*OwnedContext),
- IRLinker(new Module("ld-temp.o", *OwnedContext)) {
+ IRLinker(new Module("ld-temp.o", *OwnedContext)), OptLevel(2) {
initialize();
}
@@ -291,12 +291,11 @@ const void *LTOCodeGenerator::compileOptimized(size_t *length,
bool LTOCodeGenerator::compile_to_file(const char **name,
- bool disableOpt,
bool disableInline,
bool disableGVNLoadPRE,
bool disableVectorization,
std::string &errMsg) {
- if (!optimize(disableOpt, disableInline, disableGVNLoadPRE,
+ if (!optimize(disableInline, disableGVNLoadPRE,
disableVectorization, errMsg))
return false;
@@ -304,12 +303,11 @@ bool LTOCodeGenerator::compile_to_file(const char **name,
}
const void* LTOCodeGenerator::compile(size_t *length,
- bool disableOpt,
bool disableInline,
bool disableGVNLoadPRE,
bool disableVectorization,
std::string &errMsg) {
- if (!optimize(disableOpt, disableInline, disableGVNLoadPRE,
+ if (!optimize(disableInline, disableGVNLoadPRE,
disableVectorization, errMsg))
return nullptr;
@@ -363,9 +361,25 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
MCpu = "cyclone";
}
+ CodeGenOpt::Level CGOptLevel;
+ switch (OptLevel) {
+ case 0:
+ CGOptLevel = CodeGenOpt::None;
+ break;
+ case 1:
+ CGOptLevel = CodeGenOpt::Less;
+ break;
+ case 2:
+ CGOptLevel = CodeGenOpt::Default;
+ break;
+ case 3:
+ CGOptLevel = CodeGenOpt::Aggressive;
+ break;
+ }
+
TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options,
RelocModel, CodeModel::Default,
- CodeGenOpt::Aggressive);
+ CGOptLevel);
return true;
}
@@ -457,7 +471,6 @@ void LTOCodeGenerator::applyScopeRestrictions() {
// Start off with a verification pass.
legacy::PassManager passes;
passes.add(createVerifierPass());
- passes.add(createDebugInfoVerifierPass());
// mark which symbols can not be internalized
Mangler Mangler(TargetMach->getDataLayout());
@@ -512,8 +525,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
}
/// Optimize merged modules using various IPO passes
-bool LTOCodeGenerator::optimize(bool DisableOpt,
- bool DisableInline,
+bool LTOCodeGenerator::optimize(bool DisableInline,
bool DisableGVNLoadPRE,
bool DisableVectorization,
std::string &errMsg) {
@@ -529,9 +541,8 @@ bool LTOCodeGenerator::optimize(bool DisableOpt,
legacy::PassManager passes;
// Add an appropriate DataLayout instance for this module...
- mergedModule->setDataLayout(TargetMach->getDataLayout());
+ mergedModule->setDataLayout(*TargetMach->getDataLayout());
- passes.add(new DataLayoutPass());
passes.add(
createTargetTransformInfoWrapperPass(TargetMach->getTargetIRAnalysis()));
@@ -543,8 +554,7 @@ bool LTOCodeGenerator::optimize(bool DisableOpt,
if (!DisableInline)
PMB.Inliner = createFunctionInliningPass();
PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple);
- if (DisableOpt)
- PMB.OptLevel = 0;
+ PMB.OptLevel = OptLevel;
PMB.VerifyInput = true;
PMB.VerifyOutput = true;
@@ -567,8 +577,6 @@ bool LTOCodeGenerator::compileOptimized(raw_ostream &out, std::string &errMsg) {
legacy::PassManager codeGenPasses;
- codeGenPasses.add(new DataLayoutPass());
-
formatted_raw_ostream Out(out);
// If the bitcode files contain ARC code and were compiled with optimization,
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 0d07791..49aa97d 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -179,7 +179,8 @@ static Module *parseBitcodeFileImpl(MemoryBufferRef Buffer,
std::unique_ptr<MemoryBuffer> LightweightBuf =
MemoryBuffer::getMemBuffer(*MBOrErr, false);
ErrorOr<Module *> M = getLazyBitcodeModule(std::move(LightweightBuf), Context,
- DiagnosticHandler);
+ DiagnosticHandler,
+ true/*ShouldLazyLoadMetadata*/);
if (!M)
return nullptr;
return *M;
@@ -229,7 +230,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
options);
- M->setDataLayout(target->getDataLayout());
+ M->setDataLayout(*target->getDataLayout());
std::unique_ptr<object::IRObjectFile> IRObj(
new object::IRObjectFile(Buffer, std::move(M)));
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index e6d9acc..21edc50 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -226,6 +226,7 @@ void TypeMapTy::linkDefinedTypeBodies() {
Elements[I] = get(SrcSTy->getElementType(I));
DstSTy->setBody(Elements, SrcSTy->isPacked());
+ DstStructTypesSet.switchToNonOpaque(DstSTy);
}
SrcDefinitionsToResolve.clear();
DstResolvedOpaqueTypes.clear();
@@ -672,17 +673,12 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
getComdatLeader(SrcM, ComdatName, SrcGV))
return true;
- const DataLayout *DstDL = DstM->getDataLayout();
- const DataLayout *SrcDL = SrcM->getDataLayout();
- if (!DstDL || !SrcDL) {
- return emitError(
- "Linking COMDATs named '" + ComdatName +
- "': can't do size dependent selection without DataLayout!");
- }
+ const DataLayout &DstDL = DstM->getDataLayout();
+ const DataLayout &SrcDL = SrcM->getDataLayout();
uint64_t DstSize =
- DstDL->getTypeAllocSize(DstGV->getType()->getPointerElementType());
+ DstDL.getTypeAllocSize(DstGV->getType()->getPointerElementType());
uint64_t SrcSize =
- SrcDL->getTypeAllocSize(SrcGV->getType()->getPointerElementType());
+ SrcDL.getTypeAllocSize(SrcGV->getType()->getPointerElementType());
if (Result == Comdat::SelectionKind::ExactMatch) {
if (SrcGV->getInitializer() != DstGV->getInitializer())
return emitError("Linking COMDATs named '" + ComdatName +
@@ -768,9 +764,7 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
return false;
}
- // FIXME: Make datalayout mandatory and just use getDataLayout().
- DataLayout DL(Dest.getParent());
-
+ const DataLayout &DL = Dest.getParent()->getDataLayout();
uint64_t DestSize = DL.getTypeAllocSize(Dest.getType()->getElementType());
uint64_t SrcSize = DL.getTypeAllocSize(Src.getType()->getElementType());
LinkFromSrc = SrcSize > DestSize;
@@ -1256,9 +1250,10 @@ void ModuleLinker::linkNamedMDNodes() {
/// Drop DISubprograms that have been superseded.
///
-/// FIXME: this creates an asymmetric result: we strip losing subprograms from
-/// DstM, but leave losing subprograms in SrcM. Instead we should also strip
-/// losers from SrcM, but this requires extra plumbing in MapMetadata.
+/// FIXME: this creates an asymmetric result: we strip functions from losing
+/// subprograms in DstM, but leave losing subprograms in SrcM.
+/// TODO: Remove this logic once the backend can correctly determine canonical
+/// subprograms.
void ModuleLinker::stripReplacedSubprograms() {
// Avoid quadratic runtime by returning early when there's nothing to do.
if (OverridingFunctions.empty())
@@ -1268,8 +1263,8 @@ void ModuleLinker::stripReplacedSubprograms() {
auto Functions = std::move(OverridingFunctions);
OverridingFunctions.clear();
- // Drop subprograms whose functions have been overridden by the new compile
- // unit.
+ // Drop functions from subprograms if they've been overridden by the new
+ // compile unit.
NamedMDNode *CompileUnits = DstM->getNamedMetadata("llvm.dbg.cu");
if (!CompileUnits)
return;
@@ -1280,19 +1275,15 @@ void ModuleLinker::stripReplacedSubprograms() {
DITypedArray<DISubprogram> SPs(CU.getSubprograms());
assert(SPs && "Expected valid subprogram array");
- SmallVector<Metadata *, 16> NewSPs;
- NewSPs.reserve(SPs.getNumElements());
for (unsigned S = 0, SE = SPs.getNumElements(); S != SE; ++S) {
DISubprogram SP = SPs.getElement(S);
- if (SP && SP.getFunction() && Functions.count(SP.getFunction()))
+ if (!SP || !SP.getFunction() || !Functions.count(SP.getFunction()))
continue;
- NewSPs.push_back(SP);
+ // Prevent DebugInfoFinder from tagging this as the canonical subprogram,
+ // since the canonical one is in the incoming module.
+ SP->replaceFunction(nullptr);
}
-
- // Redirect operand to the overriding subprogram.
- if (NewSPs.size() != SPs.getNumElements())
- CU.replaceSubprograms(DIArray(MDNode::get(DstM->getContext(), NewSPs)));
}
}
@@ -1482,11 +1473,10 @@ bool ModuleLinker::run() {
// Inherit the target data from the source module if the destination module
// doesn't have one already.
- if (!DstM->getDataLayout() && SrcM->getDataLayout())
+ if (DstM->getDataLayout().isDefault())
DstM->setDataLayout(SrcM->getDataLayout());
- if (SrcM->getDataLayout() && DstM->getDataLayout() &&
- *SrcM->getDataLayout() != *DstM->getDataLayout()) {
+ if (SrcM->getDataLayout() != DstM->getDataLayout()) {
emitWarning("Linking two modules of different data layouts: '" +
SrcM->getModuleIdentifier() + "' is '" +
SrcM->getDataLayoutStr() + "' whereas '" +
@@ -1570,6 +1560,13 @@ bool ModuleLinker::run() {
MapValue(GV, ValueMap, RF_None, &TypeMap, &ValMaterializer);
}
+ // Strip replaced subprograms before mapping any metadata -- so that we're
+ // not changing metadata from the source module (note that
+ // linkGlobalValueBody() eventually calls RemapInstruction() and therefore
+ // MapMetadata()) -- but after linking global value protocols -- so that
+ // OverridingFunctions has been built.
+ stripReplacedSubprograms();
+
// Link in the function bodies that are defined in the source module into
// DstM.
for (Function &SF : *SrcM) {
@@ -1592,9 +1589,6 @@ bool ModuleLinker::run() {
linkGlobalValueBody(Src);
}
- // Strip replaced subprograms before linking together compile units.
- stripReplacedSubprograms();
-
// Remap all of the named MDNodes in Src into the DstM module. We do this
// after linking GlobalValues so that MDNodes that reference GlobalValues
// are properly remapped.
@@ -1684,6 +1678,14 @@ void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) {
NonOpaqueStructTypes.insert(Ty);
}
+void Linker::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) {
+ assert(!Ty->isOpaque());
+ NonOpaqueStructTypes.insert(Ty);
+ bool Removed = OpaqueStructTypes.erase(Ty);
+ (void)Removed;
+ assert(Removed);
+}
+
void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) {
assert(Ty->isOpaque());
OpaqueStructTypes.insert(Ty);
@@ -1777,7 +1779,7 @@ bool Linker::LinkModules(Module *Dest, Module *Src) {
//===----------------------------------------------------------------------===//
LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
- unsigned Unused, char **OutMessages) {
+ LLVMLinkerMode Unused, char **OutMessages) {
Module *D = unwrap(Dest);
std::string Message;
raw_string_ostream Stream(Message);
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 4819905..c99a3ee 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -185,11 +185,25 @@ class ELFObjectWriter : public MCObjectWriter {
}
public:
- ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &_OS,
+ ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &OS,
bool IsLittleEndian)
- : MCObjectWriter(_OS, IsLittleEndian), FWriter(IsLittleEndian),
+ : MCObjectWriter(OS, IsLittleEndian), FWriter(IsLittleEndian),
TargetObjectWriter(MOTW), NeedsGOT(false) {}
+ void reset() override {
+ UsedInReloc.clear();
+ WeakrefUsedInReloc.clear();
+ Renames.clear();
+ Relocations.clear();
+ ShStrTabBuilder.clear();
+ StrTabBuilder.clear();
+ FileSymbolData.clear();
+ LocalSymbolData.clear();
+ ExternalSymbolData.clear();
+ UndefinedSymbolData.clear();
+ MCObjectWriter::reset();
+ }
+
virtual ~ELFObjectWriter();
void WriteWord(uint64_t W) {
@@ -298,6 +312,8 @@ class ELFObjectWriter : public MCObjectWriter {
bool InSet,
bool IsPCRel) const override;
+ bool isWeak(const MCSymbolData &SD) const override;
+
void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
void writeSection(MCAssembler &Asm,
const SectionIndexMapTy &SectionIndexMap,
@@ -789,6 +805,10 @@ static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) {
return nullptr;
}
+static bool isWeak(const MCSymbolData &D) {
+ return D.getFlags() & ELF_STB_Weak || MCELF::GetType(D) == ELF::STT_GNU_IFUNC;
+}
+
void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -829,6 +849,10 @@ void ELFObjectWriter::RecordRelocation(MCAssembler &Asm,
Fixup.getLoc(), "Cannot represent a difference across sections");
const MCSymbolData &SymBD = Asm.getSymbolData(SymB);
+ if (::isWeak(SymBD))
+ Asm.getContext().FatalError(
+ Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol");
+
uint64_t SymBOffset = Layout.getSymbolOffset(&SymBD);
uint64_t K = SymBOffset - FixupOffset;
IsPCRel = true;
@@ -1186,7 +1210,7 @@ getUncompressedData(MCAsmLayout &Layout,
static bool
prependCompressionHeader(uint64_t Size,
SmallVectorImpl<char> &CompressedContents) {
- static const StringRef Magic = "ZLIB";
+ const StringRef Magic = "ZLIB";
if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size())
return false;
if (sys::IsLittleEndianHost)
@@ -1348,7 +1372,8 @@ static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
return B.Offset - A.Offset;
if (B.Type != A.Type)
return A.Type - B.Type;
- llvm_unreachable("ELFRelocs might be unstable!");
+ //llvm_unreachable("ELFRelocs might be unstable!");
+ return 0;
}
static void sortRelocs(const MCAssembler &Asm,
@@ -1794,12 +1819,16 @@ ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCFragment &FB,
bool InSet,
bool IsPCRel) const {
- if (DataA.getFlags() & ELF_STB_Weak || MCELF::GetType(DataA) == ELF::STT_GNU_IFUNC)
+ if (::isWeak(DataA))
return false;
return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
Asm, DataA, FB,InSet, IsPCRel);
}
+bool ELFObjectWriter::isWeak(const MCSymbolData &SD) const {
+ return ::isWeak(SD);
+}
+
MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
raw_ostream &OS,
bool IsLittleEndian) {
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 04b8042..bad257a 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -39,6 +39,7 @@ MCAsmInfo::MCAsmInfo() {
CommentString = "#";
LabelSuffix = ":";
UseAssignmentForEHBegin = false;
+ NeedsLocalForSize = false;
PrivateGlobalPrefix = "L";
PrivateLabelPrefix = PrivateGlobalPrefix;
LinkerPrivateGlobalPrefix = "";
@@ -68,6 +69,7 @@ MCAsmInfo::MCAsmInfo() {
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = true;
LCOMMDirectiveAlignmentType = LCOMM::NoAlignment;
+ HasFunctionAlignment = true;
HasDotTypeDotSizeDirective = true;
HasSingleParameterDotFile = true;
HasIdentDirective = false;
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index a2a2504..ae9486d 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -16,7 +16,6 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCStreamer.h"
using namespace llvm;
bool MCAsmInfoDarwin::isSectionAtomizableBySymbols(
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 2312cd5..62f5279 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -267,7 +267,7 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
}
CommentStream.flush();
- StringRef Comments = CommentToEmit.str();
+ StringRef Comments = CommentToEmit;
assert(Comments.back() == '\n' &&
"Comment array not newline terminated");
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 50ce845..857eafc 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -142,7 +142,7 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout,
// If SD is a variable, evaluate it.
MCValue Target;
- if (!S.getVariableValue()->EvaluateAsValue(Target, &Layout, nullptr))
+ if (!S.getVariableValue()->EvaluateAsRelocatable(Target, &Layout, nullptr))
report_fatal_error("unable to evaluate offset for variable '" +
S.getName() + "'");
@@ -188,7 +188,7 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
const MCExpr *Expr = Symbol.getVariableValue();
MCValue Value;
- if (!Expr->EvaluateAsValue(Value, this, nullptr))
+ if (!Expr->evaluateAsValue(Value, *this))
llvm_unreachable("Invalid Expression");
const MCSymbolRefExpr *RefB = Value.getSymB();
@@ -277,9 +277,8 @@ MCFragment::MCFragment() : Kind(FragmentType(~0)) {
MCFragment::~MCFragment() {
}
-MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
- : Kind(_Kind), Parent(_Parent), Atom(nullptr), Offset(~UINT64_C(0))
-{
+MCFragment::MCFragment(FragmentType Kind, MCSectionData *Parent)
+ : Kind(Kind), Parent(Parent), Atom(nullptr), Offset(~UINT64_C(0)) {
if (Parent)
Parent->getFragmentList().push_back(this);
}
@@ -298,15 +297,10 @@ MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() {
MCSectionData::MCSectionData() : Section(nullptr) {}
-MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
- : Section(&_Section),
- Ordinal(~UINT32_C(0)),
- Alignment(1),
- BundleLockState(NotBundleLocked),
- BundleLockNestingDepth(0),
- BundleGroupBeforeFirstInst(false),
- HasInstructions(false)
-{
+MCSectionData::MCSectionData(const MCSection &Section, MCAssembler *A)
+ : Section(&Section), Ordinal(~UINT32_C(0)), Alignment(1),
+ BundleLockState(NotBundleLocked), BundleLockNestingDepth(0),
+ BundleGroupBeforeFirstInst(false), HasInstructions(false) {
if (A)
A->getSectionList().push_back(this);
}
@@ -364,10 +358,10 @@ void MCSectionData::setBundleLockState(BundleLockStateType NewState) {
MCSymbolData::MCSymbolData() : Symbol(nullptr) {}
-MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
- uint64_t _Offset, MCAssembler *A)
- : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
- SymbolSize(nullptr), CommonAlign(-1U), Flags(0), Index(0) {
+MCSymbolData::MCSymbolData(const MCSymbol &Symbol, MCFragment *Fragment,
+ uint64_t Offset, MCAssembler *A)
+ : Symbol(&Symbol), Fragment(Fragment), Offset(Offset), SymbolSize(nullptr),
+ CommonAlign(-1U), Flags(0), Index(0) {
if (A)
A->getSymbolList().push_back(this);
}
@@ -479,18 +473,6 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
return SD->getFragment()->getAtom();
}
-// Try to fully compute Expr to an absolute value and if that fails produce
-// a relocatable expr.
-// FIXME: Should this be the behavior of EvaluateAsRelocatable itself?
-static bool evaluate(const MCExpr &Expr, const MCAsmLayout &Layout,
- const MCFixup &Fixup, MCValue &Target) {
- if (Expr.EvaluateAsValue(Target, &Layout, &Fixup)) {
- if (Target.isAbsolute())
- return true;
- }
- return Expr.EvaluateAsRelocatable(Target, &Layout, &Fixup);
-}
-
bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, uint64_t &Value) const {
@@ -500,7 +482,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
// probably merge the two into a single callback that tries to evaluate a
// fixup and records a relocation if one is needed.
const MCExpr *Expr = Fixup.getValue();
- if (!evaluate(*Expr, Layout, Fixup, Target))
+ if (!Expr->EvaluateAsRelocatable(Target, &Layout, &Fixup))
getContext().FatalError(Fixup.getLoc(), "expected relocatable expression");
bool IsPCRel = Backend.getFixupKindInfo(
@@ -795,7 +777,7 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
case MCFragment::FT_LEB: {
const MCLEBFragment &LF = cast<MCLEBFragment>(F);
- OW->WriteBytes(LF.getContents().str());
+ OW->WriteBytes(LF.getContents());
break;
}
@@ -811,12 +793,12 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
case MCFragment::FT_Dwarf: {
const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
- OW->WriteBytes(OF.getContents().str());
+ OW->WriteBytes(OF.getContents());
break;
}
case MCFragment::FT_DwarfFrame: {
const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
- OW->WriteBytes(CF.getContents().str());
+ OW->WriteBytes(CF.getContents());
break;
}
}
@@ -1040,7 +1022,10 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
uint64_t OldSize = LF.getContents().size();
- int64_t Value = LF.getValue().evaluateKnownAbsolute(Layout);
+ int64_t Value;
+ bool Abs = LF.getValue().evaluateKnownAbsolute(Value, Layout);
+ if (!Abs)
+ report_fatal_error("sleb128 and uleb128 expressions must be absolute");
SmallString<8> &Data = LF.getContents();
Data.clear();
raw_svector_ostream OSE(Data);
@@ -1056,7 +1041,10 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
MCDwarfLineAddrFragment &DF) {
MCContext &Context = Layout.getAssembler().getContext();
uint64_t OldSize = DF.getContents().size();
- int64_t AddrDelta = DF.getAddrDelta().evaluateKnownAbsolute(Layout);
+ int64_t AddrDelta;
+ bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+ assert(Abs && "We created a line delta with an invalid expression");
+ (void) Abs;
int64_t LineDelta;
LineDelta = DF.getLineDelta();
SmallString<8> &Data = DF.getContents();
@@ -1071,7 +1059,10 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
MCDwarfCallFrameFragment &DF) {
MCContext &Context = Layout.getAssembler().getContext();
uint64_t OldSize = DF.getContents().size();
- int64_t AddrDelta = DF.getAddrDelta().evaluateKnownAbsolute(Layout);
+ int64_t AddrDelta;
+ bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
+ assert(Abs && "We created call frame with an invalid expression");
+ (void) Abs;
SmallString<8> &Data = DF.getContents();
Data.clear();
raw_svector_ostream OSE(Data);
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 721edd4..3cb3ea1 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
@@ -33,7 +34,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
const MCObjectFileInfo *mofi, const SourceMgr *mgr,
bool DoAutoReset)
: SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(),
- Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0),
+ Symbols(Allocator), UsedNames(Allocator),
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false),
GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4),
AllowTemporaryLabels(true), DwarfCompileUnitID(0),
@@ -86,7 +87,7 @@ void MCContext::reset() {
ELFUniquingMap.clear();
COFFUniquingMap.clear();
- NextUniqueID = 0;
+ NextID.clear();
AllowTemporaryLabels = true;
DwarfLocSeen = false;
GenDwarfForAssembly = false;
@@ -97,13 +98,15 @@ void MCContext::reset() {
// Symbol Manipulation
//===----------------------------------------------------------------------===//
-MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
- assert(!Name.empty() && "Normal symbols cannot be unnamed!");
+MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
+ SmallString<128> NameSV;
+ StringRef NameRef = Name.toStringRef(NameSV);
- MCSymbol *&Sym = Symbols[Name];
+ assert(!NameRef.empty() && "Normal symbols cannot be unnamed!");
+ MCSymbol *&Sym = Symbols[NameRef];
if (!Sym)
- Sym = CreateSymbol(Name);
+ Sym = CreateSymbol(NameRef, false);
return Sym;
}
@@ -130,53 +133,54 @@ MCSymbol *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
return Sym;
}
-MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName) {
- return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
- "frameallocation_" + FuncName);
+MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName,
+ unsigned Idx) {
+ return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName +
+ "$frame_escape_" + Twine(Idx));
}
-MCSymbol *MCContext::CreateSymbol(StringRef Name) {
+MCSymbol *MCContext::CreateSymbol(StringRef Name, bool AlwaysAddSuffix) {
// Determine whether this is an assembler temporary or normal label, if used.
- bool isTemporary = false;
+ bool IsTemporary = false;
if (AllowTemporaryLabels)
- isTemporary = Name.startswith(MAI->getPrivateGlobalPrefix());
+ IsTemporary = Name.startswith(MAI->getPrivateGlobalPrefix());
- auto NameEntry = UsedNames.insert(std::make_pair(Name, true));
- if (!NameEntry.second) {
- assert(isTemporary && "Cannot rename non-temporary symbols");
- SmallString<128> NewName = Name;
- do {
+ SmallString<128> NewName = Name;
+ bool AddSuffix = AlwaysAddSuffix;
+ unsigned &NextUniqueID = NextID[Name];
+ for (;;) {
+ if (AddSuffix) {
NewName.resize(Name.size());
raw_svector_ostream(NewName) << NextUniqueID++;
- NameEntry = UsedNames.insert(std::make_pair(NewName, true));
- } while (!NameEntry.second);
+ }
+ auto NameEntry = UsedNames.insert(std::make_pair(NewName, true));
+ if (NameEntry.second) {
+ // Ok, we found a name. Have the MCSymbol object itself refer to the copy
+ // of the string that is embedded in the UsedNames entry.
+ MCSymbol *Result =
+ new (*this) MCSymbol(NameEntry.first->getKey(), IsTemporary);
+ return Result;
+ }
+ assert(IsTemporary && "Cannot rename non-temporary symbols");
+ AddSuffix = true;
}
-
- // Ok, the entry doesn't already exist. Have the MCSymbol object itself refer
- // to the copy of the string that is embedded in the UsedNames entry.
- MCSymbol *Result =
- new (*this) MCSymbol(NameEntry.first->getKey(), isTemporary);
-
- return Result;
+ llvm_unreachable("Infinite loop");
}
-MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
+MCSymbol *MCContext::createTempSymbol(const Twine &Name, bool AlwaysAddSuffix) {
SmallString<128> NameSV;
- return GetOrCreateSymbol(Name.toStringRef(NameSV));
+ raw_svector_ostream(NameSV) << MAI->getPrivateGlobalPrefix() << Name;
+ return CreateSymbol(NameSV, AlwaysAddSuffix);
}
MCSymbol *MCContext::CreateLinkerPrivateTempSymbol() {
SmallString<128> NameSV;
- raw_svector_ostream(NameSV)
- << MAI->getLinkerPrivateGlobalPrefix() << "tmp" << NextUniqueID++;
- return CreateSymbol(NameSV);
+ raw_svector_ostream(NameSV) << MAI->getLinkerPrivateGlobalPrefix() << "tmp";
+ return CreateSymbol(NameSV, true);
}
MCSymbol *MCContext::CreateTempSymbol() {
- SmallString<128> NameSV;
- raw_svector_ostream(NameSV)
- << MAI->getPrivateGlobalPrefix() << "tmp" << NextUniqueID++;
- return CreateSymbol(NameSV);
+ return createTempSymbol("tmp", true);
}
unsigned MCContext::NextInstance(unsigned LocalLabelVal) {
@@ -214,24 +218,20 @@ MCSymbol *MCContext::GetDirectionalLocalSymbol(unsigned LocalLabelVal,
return getOrCreateDirectionalLocalSymbol(LocalLabelVal, Instance);
}
-MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
- return Symbols.lookup(Name);
-}
-
MCSymbol *MCContext::LookupSymbol(const Twine &Name) const {
SmallString<128> NameSV;
- Name.toVector(NameSV);
- return LookupSymbol(NameSV.str());
+ StringRef NameRef = Name.toStringRef(NameSV);
+ return Symbols.lookup(NameRef);
}
//===----------------------------------------------------------------------===//
// Section Management
//===----------------------------------------------------------------------===//
-const MCSectionMachO *MCContext::
-getMachOSection(StringRef Segment, StringRef Section,
- unsigned TypeAndAttributes,
- unsigned Reserved2, SectionKind Kind) {
+const MCSectionMachO *
+MCContext::getMachOSection(StringRef Segment, StringRef Section,
+ unsigned TypeAndAttributes, unsigned Reserved2,
+ SectionKind Kind, const char *BeginSymName) {
// We unique sections by their segment/section pair. The returned section
// may not have the same flags as the requested section, if so this should be
@@ -244,17 +244,23 @@ getMachOSection(StringRef Segment, StringRef Section,
Name += Section;
// Do the lookup, if we have a hit, return it.
- const MCSectionMachO *&Entry = MachOUniquingMap[Name.str()];
- if (Entry) return Entry;
+ const MCSectionMachO *&Entry = MachOUniquingMap[Name];
+ if (Entry)
+ return Entry;
+
+ MCSymbol *Begin = nullptr;
+ if (BeginSymName)
+ Begin = createTempSymbol(BeginSymName, false);
// Otherwise, return a new section.
return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
- Reserved2, Kind);
+ Reserved2, Kind, Begin);
}
const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
- unsigned Flags) {
- return getELFSection(Section, Type, Flags, 0, "");
+ unsigned Flags,
+ const char *BeginSymName) {
+ return getELFSection(Section, Type, Flags, 0, "", BeginSymName);
}
void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) {
@@ -272,7 +278,8 @@ void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) {
const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
unsigned Flags, unsigned EntrySize,
- StringRef Group, bool Unique) {
+ StringRef Group, bool Unique,
+ const char *BeginSymName) {
// Do the lookup, if we have a hit, return it.
auto IterBool = ELFUniquingMap.insert(
std::make_pair(SectionGroupPair(Section, Group), nullptr));
@@ -292,8 +299,12 @@ const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
else
Kind = SectionKind::getReadOnly();
- MCSectionELF *Result = new (*this)
- MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym, Unique);
+ MCSymbol *Begin = nullptr;
+ if (BeginSymName)
+ Begin = createTempSymbol(BeginSymName, false);
+
+ MCSectionELF *Result = new (*this) MCSectionELF(
+ CachedName, Type, Flags, Kind, EntrySize, GroupSym, Unique, Begin);
if (!Unique)
Entry.second = Result;
return Result;
@@ -301,22 +312,23 @@ const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
const MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
unsigned Flags, unsigned EntrySize,
- StringRef Group) {
- return getELFSection(Section, Type, Flags, EntrySize, Group, false);
+ StringRef Group,
+ const char *BeginSymName) {
+ return getELFSection(Section, Type, Flags, EntrySize, Group, false,
+ BeginSymName);
}
const MCSectionELF *MCContext::CreateELFGroupSection() {
- MCSectionELF *Result =
- new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
- SectionKind::getReadOnly(), 4, nullptr, false);
+ MCSectionELF *Result = new (*this)
+ MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4,
+ nullptr, false, nullptr);
return Result;
}
-const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
- unsigned Characteristics,
- SectionKind Kind,
- StringRef COMDATSymName,
- int Selection) {
+const MCSectionCOFF *
+MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
+ SectionKind Kind, StringRef COMDATSymName,
+ int Selection, const char *BeginSymName) {
// Do the lookup, if we have a hit, return it.
SectionGroupTriple T(Section, COMDATSymName, Selection);
@@ -329,18 +341,23 @@ const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
if (!COMDATSymName.empty())
COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
+ MCSymbol *Begin = nullptr;
+ if (BeginSymName)
+ Begin = createTempSymbol(BeginSymName, false);
+
StringRef CachedName = std::get<0>(Iter->first);
- MCSectionCOFF *Result = new (*this)
- MCSectionCOFF(CachedName, Characteristics, COMDATSymbol, Selection, Kind);
+ MCSectionCOFF *Result = new (*this) MCSectionCOFF(
+ CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin);
Iter->second = Result;
return Result;
}
-const MCSectionCOFF *
-MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
- SectionKind Kind) {
- return getCOFFSection(Section, Characteristics, Kind, "", 0);
+const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind,
+ const char *BeginSymName) {
+ return getCOFFSection(Section, Characteristics, Kind, "", 0, BeginSymName);
}
const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 5d96914..87e7ed1 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -179,28 +179,19 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, const MCSection *Section,
}
// Emit a DW_LNE_end_sequence for the end of the section.
- // Using the pointer Section create a temporary label at the end of the
- // section and use that and the LastLabel to compute the address delta
- // and use INT64_MAX as the line delta which is the signal that this is
- // actually a DW_LNE_end_sequence.
+ // Use the section end label to compute the address delta and use INT64_MAX
+ // as the line delta which is the signal that this is actually a
+ // DW_LNE_end_sequence.
+ MCSymbol *SectionEnd = MCOS->endSection(Section);
- // Switch to the section to be able to create a symbol at its end.
- // TODO: keep track of the last subsection so that this symbol appears in the
- // correct place.
- MCOS->SwitchSection(Section);
+ // Switch back the dwarf line section, in case endSection had to switch the
+ // section.
+ MCContext &Ctx = MCOS->getContext();
+ MCOS->SwitchSection(Ctx.getObjectFileInfo()->getDwarfLineSection());
- MCContext &context = MCOS->getContext();
- // Create a symbol at the end of the section.
- MCSymbol *SectionEnd = context.CreateTempSymbol();
- // Set the value of the symbol, as we are at the end of the section.
- MCOS->EmitLabel(SectionEnd);
-
- // Switch back the dwarf line section.
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
-
- const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
+ const MCAsmInfo *AsmInfo = Ctx.getAsmInfo();
MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
- asmInfo->getPointerSize());
+ AsmInfo->getPointerSize());
}
//
@@ -243,7 +234,8 @@ std::pair<MCSymbol *, MCSymbol *> MCDwarfLineTableHeader::Emit(MCStreamer *MCOS)
0, // length of DW_LNS_set_epilogue_begin
1 // DW_LNS_set_isa
};
- assert(array_lengthof(StandardOpcodeLengths) == (DWARF2_LINE_OPCODE_BASE - 1));
+ assert(array_lengthof(StandardOpcodeLengths) ==
+ (DWARF2_LINE_OPCODE_BASE - 1));
return Emit(MCOS, StandardOpcodeLengths);
}
@@ -446,7 +438,7 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
if (LineDelta == INT64_MAX) {
if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
OS << char(dwarf::DW_LNS_const_add_pc);
- else {
+ else if (AddrDelta) {
OS << char(dwarf::DW_LNS_advance_pc);
encodeULEB128(AddrDelta, OS);
}
@@ -1007,11 +999,13 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
namespace {
class FrameEmitterImpl {
int CFAOffset;
+ int InitialCFAOffset;
bool IsEH;
const MCSymbol *SectionStart;
public:
FrameEmitterImpl(bool isEH)
- : CFAOffset(0), IsEH(isEH), SectionStart(nullptr) {}
+ : CFAOffset(0), InitialCFAOffset(0), IsEH(isEH), SectionStart(nullptr) {
+ }
void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
@@ -1292,7 +1286,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
Augmentation += "R";
if (IsSignalFrame)
Augmentation += "S";
- streamer.EmitBytes(Augmentation.str());
+ streamer.EmitBytes(Augmentation);
}
streamer.EmitIntValue(0, 1);
@@ -1353,6 +1347,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
EmitCFIInstructions(streamer, Instructions, nullptr);
}
+ InitialCFAOffset = CFAOffset;
+
// Padding
streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize());
@@ -1368,6 +1364,8 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
MCSymbol *fdeEnd = context.CreateTempSymbol();
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
+ CFAOffset = InitialCFAOffset;
+
// Length
const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
emitAbsValue(streamer, Length, 4);
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 199825e..cdf5033 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -122,12 +123,11 @@ void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
// If neither T1 < T2 nor T2 < T1 according to this ordering, use T2 (the user
// provided type).
static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
- unsigned TypeOrdering[] = {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC,
- ELF::STT_GNU_IFUNC, ELF::STT_TLS};
- for (unsigned i = 0; i != array_lengthof(TypeOrdering); ++i) {
- if (T1 == TypeOrdering[i])
+ for (unsigned Type : {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC,
+ ELF::STT_GNU_IFUNC, ELF::STT_TLS}) {
+ if (T1 == Type)
return T2;
- if (T2 == TypeOrdering[i])
+ if (T2 == Type)
return T1;
}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 709dc6b..8a64403 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -192,6 +192,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_GOTPAGE: return "GOTPAGE";
case VK_GOTPAGEOFF: return "GOTPAGEOFF";
case VK_SECREL: return "SECREL32";
+ case VK_SIZE: return "SIZE";
case VK_WEAKREF: return "WEAKREF";
case VK_ARM_NONE: return "none";
case VK_ARM_TARGET1: return "target1";
@@ -311,6 +312,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("gotpageoff", VK_GOTPAGEOFF)
.Case("imgrel", VK_COFF_IMGREL32)
.Case("secrel32", VK_SECREL)
+ .Case("size", VK_SIZE)
.Case("l", VK_PPC_LO)
.Case("h", VK_PPC_HI)
.Case("ha", VK_PPC_HA)
@@ -404,13 +406,10 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
return EvaluateAsAbsolute(Res, &Asm, nullptr, nullptr);
}
-int64_t MCExpr::evaluateKnownAbsolute(const MCAsmLayout &Layout) const {
- int64_t Res;
- bool Abs =
- evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr, true);
- (void)Abs;
- assert(Abs && "Not actually absolute");
- return Res;
+bool MCExpr::evaluateKnownAbsolute(int64_t &Res,
+ const MCAsmLayout &Layout) const {
+ return evaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr,
+ true);
}
bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
@@ -433,8 +432,8 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
return true;
}
- bool IsRelocatable = EvaluateAsRelocatableImpl(
- Value, Asm, Layout, nullptr, Addrs, InSet, /*ForceVarExpansion*/ false);
+ bool IsRelocatable =
+ EvaluateAsRelocatableImpl(Value, Asm, Layout, nullptr, Addrs, InSet);
// Record the current value.
Res = Value.getConstant();
@@ -443,13 +442,10 @@ bool MCExpr::evaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
}
/// \brief Helper method for \see EvaluateSymbolAdd().
-static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
- const MCAsmLayout *Layout,
- const SectionAddrMap *Addrs,
- bool InSet,
- const MCSymbolRefExpr *&A,
- const MCSymbolRefExpr *&B,
- int64_t &Addend) {
+static void AttemptToFoldSymbolOffsetDifference(
+ const MCAssembler *Asm, const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs, bool InSet, const MCSymbolRefExpr *&A,
+ const MCSymbolRefExpr *&B, int64_t &Addend) {
if (!A || !B)
return;
@@ -523,13 +519,11 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
/// They might look redundant, but this function can be used before layout
/// is done (see the object streamer for example) and having the Asm argument
/// lets us avoid relaxations early.
-static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
- const MCAsmLayout *Layout,
- const SectionAddrMap *Addrs,
- bool InSet,
- const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
- const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
- MCValue &Res) {
+static bool
+EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout,
+ const SectionAddrMap *Addrs, bool InSet, const MCValue &LHS,
+ const MCSymbolRefExpr *RHS_A, const MCSymbolRefExpr *RHS_B,
+ int64_t RHS_Cst, MCValue &Res) {
// FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
// about dealing with modifiers. This will ultimately bite us, one day.
const MCSymbolRefExpr *LHS_A = LHS.getSymA();
@@ -587,21 +581,29 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
const MCFixup *Fixup) const {
MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
- false, /*ForceVarExpansion*/ false);
+ false);
}
-bool MCExpr::EvaluateAsValue(MCValue &Res, const MCAsmLayout *Layout,
- const MCFixup *Fixup) const {
- MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
- return EvaluateAsRelocatableImpl(Res, Assembler, Layout, Fixup, nullptr,
- false, /*ForceVarExpansion*/ true);
+bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const {
+ MCAssembler *Assembler = &Layout.getAssembler();
+ return EvaluateAsRelocatableImpl(Res, Assembler, &Layout, nullptr, nullptr,
+ true);
+}
+
+static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) {
+ if (InSet)
+ return true;
+ if (!Asm)
+ return false;
+ const MCSymbolData &SD = Asm->getSymbolData(Sym);
+ return !Asm->getWriter().isWeak(SD);
}
bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
const MCFixup *Fixup,
- const SectionAddrMap *Addrs, bool InSet,
- bool ForceVarExpansion) const {
+ const SectionAddrMap *Addrs,
+ bool InSet) const {
++stats::MCExprEvaluate;
switch (getKind()) {
@@ -618,28 +620,24 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCSymbol &Sym = SRE->getSymbol();
// Evaluate recursively if this is a variable.
- if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+ if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None &&
+ canExpand(Sym, Asm, InSet)) {
+ bool IsMachO = SRE->hasSubsectionsViaSymbols();
if (Sym.getVariableValue()->EvaluateAsRelocatableImpl(
- Res, Asm, Layout, Fixup, Addrs, true, ForceVarExpansion)) {
+ Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) {
+ if (!IsMachO)
+ return true;
+
const MCSymbolRefExpr *A = Res.getSymA();
const MCSymbolRefExpr *B = Res.getSymB();
-
- if (SRE->hasSubsectionsViaSymbols()) {
- // FIXME: This is small hack. Given
- // a = b + 4
- // .long a
- // the OS X assembler will completely drop the 4. We should probably
- // include it in the relocation or produce an error if that is not
- // possible.
- if (!A && !B)
- return true;
- } else {
- if (ForceVarExpansion)
- return true;
- bool IsSymbol = A && A->getSymbol().isDefined();
- if (!IsSymbol)
- return true;
- }
+ // FIXME: This is small hack. Given
+ // a = b + 4
+ // .long a
+ // the OS X assembler will completely drop the 4. We should probably
+ // include it in the relocation or produce an error if that is not
+ // possible.
+ if (!A && !B)
+ return true;
}
}
@@ -651,9 +649,8 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
MCValue Value;
- if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
- Fixup, Addrs, InSet,
- ForceVarExpansion))
+ if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, Fixup,
+ Addrs, InSet))
return false;
switch (AUE->getOpcode()) {
@@ -686,12 +683,10 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
MCValue LHSValue, RHSValue;
- if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
- Fixup, Addrs, InSet,
- ForceVarExpansion) ||
- !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
- Fixup, Addrs, InSet,
- ForceVarExpansion))
+ if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, Fixup,
+ Addrs, InSet) ||
+ !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup,
+ Addrs, InSet))
return false;
// We only support a few operations on non-constant expressions, handle
@@ -704,14 +699,12 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
// Negate RHS and add.
return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
RHSValue.getSymB(), RHSValue.getSymA(),
- -RHSValue.getConstant(),
- Res);
+ -RHSValue.getConstant(), Res);
case MCBinaryExpr::Add:
return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
RHSValue.getSymA(), RHSValue.getSymB(),
- RHSValue.getConstant(),
- Res);
+ RHSValue.getConstant(), Res);
}
}
diff --git a/lib/MC/MCLinkerOptimizationHint.cpp b/lib/MC/MCLinkerOptimizationHint.cpp
index 7739878..8db22dc 100644
--- a/lib/MC/MCLinkerOptimizationHint.cpp
+++ b/lib/MC/MCLinkerOptimizationHint.cpp
@@ -9,7 +9,7 @@
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/Support/LEB128.h"
using namespace llvm;
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 79eab49..d5c7101 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -26,6 +26,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -39,6 +40,9 @@ private:
/// need for local relocations. False by default.
bool LabelSections;
+ bool DWARFMustBeAtTheEnd;
+ bool CreatedADWARFSection;
+
/// HasSectionLabel - map of which sections have already had a non-local
/// label emitted to them. Used so we don't emit extraneous linker local
/// labels in the middle of the section.
@@ -51,9 +55,9 @@ private:
public:
MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
- MCCodeEmitter *Emitter, bool label)
- : MCObjectStreamer(Context, MAB, OS, Emitter),
- LabelSections(label) {}
+ MCCodeEmitter *Emitter, bool DWARFMustBeAtTheEnd, bool label)
+ : MCObjectStreamer(Context, MAB, OS, Emitter), LabelSections(label),
+ DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), CreatedADWARFSection(false) {}
/// state management
void reset() override {
@@ -119,10 +123,43 @@ public:
} // end anonymous namespace.
+static bool canGoAfterDWARF(const MCSectionMachO &MSec) {
+ // These sections are created by the assembler itself after the end of
+ // the .s file.
+ StringRef SegName = MSec.getSegmentName();
+ StringRef SecName = MSec.getSectionName();
+
+ if (SegName == "__LD" && SecName == "__compact_unwind")
+ return true;
+
+ if (SegName == "__IMPORT") {
+ if (SecName == "__jump_table")
+ return true;
+
+ if (SecName == "__pointers")
+ return true;
+ }
+
+ if (SegName == "__TEXT" && SecName == "__eh_frame")
+ return true;
+
+ if (SegName == "__DATA" && SecName == "__nl_symbol_ptr")
+ return true;
+
+ return false;
+}
+
void MCMachOStreamer::ChangeSection(const MCSection *Section,
const MCExpr *Subsection) {
// Change the section normally.
- MCObjectStreamer::ChangeSection(Section, Subsection);
+ bool Created = MCObjectStreamer::changeSectionImpl(Section, Subsection);
+ const MCSectionMachO &MSec = *cast<MCSectionMachO>(Section);
+ StringRef SegName = MSec.getSegmentName();
+ if (SegName == "__DWARF")
+ CreatedADWARFSection = true;
+ else if (Created && DWARFMustBeAtTheEnd && !canGoAfterDWARF(MSec))
+ assert(!CreatedADWARFSection && "Creating regular section after DWARF");
+
// Output a linker-local symbol so we don't need section-relative local
// relocations. The linker hates us when we do that.
if (LabelSections && !HasSectionLabel[Section]) {
@@ -455,9 +492,10 @@ void MCMachOStreamer::FinishImpl() {
MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *CE,
- bool RelaxAll,
+ bool RelaxAll, bool DWARFMustBeAtTheEnd,
bool LabelSections) {
- MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE, LabelSections);
+ MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE,
+ DWARFMustBeAtTheEnd, LabelSections);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 11c9cc2..cd70362 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -183,82 +183,60 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
// Debug Information.
DwarfAccelNamesSection =
- Ctx->getMachOSection("__DWARF", "__apple_names",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__apple_names", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "names_begin");
DwarfAccelObjCSection =
- Ctx->getMachOSection("__DWARF", "__apple_objc",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__apple_objc", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "objc_begin");
// 16 character section limit...
DwarfAccelNamespaceSection =
- Ctx->getMachOSection("__DWARF", "__apple_namespac",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__apple_namespac", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "namespac_begin");
DwarfAccelTypesSection =
- Ctx->getMachOSection("__DWARF", "__apple_types",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__apple_types", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "types_begin");
DwarfAbbrevSection =
- Ctx->getMachOSection("__DWARF", "__debug_abbrev",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_abbrev", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "section_abbrev");
DwarfInfoSection =
- Ctx->getMachOSection("__DWARF", "__debug_info",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_info", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "section_info");
DwarfLineSection =
- Ctx->getMachOSection("__DWARF", "__debug_line",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_line", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "section_line");
DwarfFrameSection =
- Ctx->getMachOSection("__DWARF", "__debug_frame",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_frame", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfPubNamesSection =
- Ctx->getMachOSection("__DWARF", "__debug_pubnames",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_pubnames", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfPubTypesSection =
- Ctx->getMachOSection("__DWARF", "__debug_pubtypes",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_pubtypes", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfGnuPubNamesSection =
- Ctx->getMachOSection("__DWARF", "__debug_gnu_pubn",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_gnu_pubn", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfGnuPubTypesSection =
- Ctx->getMachOSection("__DWARF", "__debug_gnu_pubt",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_gnu_pubt", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfStrSection =
- Ctx->getMachOSection("__DWARF", "__debug_str",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_str", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "info_string");
DwarfLocSection =
- Ctx->getMachOSection("__DWARF", "__debug_loc",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "section_debug_loc");
DwarfARangesSection =
- Ctx->getMachOSection("__DWARF", "__debug_aranges",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_aranges", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfRangesSection =
- Ctx->getMachOSection("__DWARF", "__debug_ranges",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- DwarfMacroInfoSection =
- Ctx->getMachOSection("__DWARF", "__debug_macinfo",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_ranges", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata(), "debug_range");
DwarfDebugInlineSection =
- Ctx->getMachOSection("__DWARF", "__debug_inlined",
- MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
- StackMapSection =
- Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0,
- SectionKind::getMetadata());
+ Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
+ StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps",
+ 0, SectionKind::getMetadata());
TLSExtraDataSection = TLSTLVSection;
}
@@ -477,9 +455,10 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
COFFDebugSymbolsSection = nullptr;
// Debug Info Sections.
- DwarfAbbrevSection =
- Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0);
- DwarfInfoSection = Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0);
+ DwarfAbbrevSection = Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
+ "section_abbrev");
+ DwarfInfoSection =
+ Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0, "section_info");
DwarfLineSection = Ctx->getELFSection(".debug_line", ELF::SHT_PROGBITS, 0);
DwarfFrameSection = Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0);
DwarfPubNamesSection =
@@ -497,21 +476,19 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
DwarfARangesSection =
Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0);
DwarfRangesSection =
- Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0);
- DwarfMacroInfoSection =
- Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, "debug_range");
// DWARF5 Experimental Debug Info
// Accelerator Tables
DwarfAccelNamesSection =
- Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0, "names_begin");
DwarfAccelObjCSection =
- Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0);
- DwarfAccelNamespaceSection =
- Ctx->getELFSection(".apple_namespaces", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0, "objc_begin");
+ DwarfAccelNamespaceSection = Ctx->getELFSection(
+ ".apple_namespaces", ELF::SHT_PROGBITS, 0, "namespac_begin");
DwarfAccelTypesSection =
- Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0, "types_begin");
// Fission Sections
DwarfInfoDWOSection =
@@ -526,72 +503,58 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
DwarfLineDWOSection =
Ctx->getELFSection(".debug_line.dwo", ELF::SHT_PROGBITS, 0);
DwarfLocDWOSection =
- Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0, "skel_loc");
DwarfStrOffDWOSection =
Ctx->getELFSection(".debug_str_offsets.dwo", ELF::SHT_PROGBITS, 0);
- DwarfAddrSection = Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0);
+ DwarfAddrSection =
+ Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0, "addr_sec");
StackMapSection =
Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
}
-
void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb;
CommDirectiveSupportsAlignment = true;
// COFF
- BSSSection =
- Ctx->getCOFFSection(".bss",
- COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getBSS());
- TextSection =
- Ctx->getCOFFSection(".text",
- (IsWoA ? COFF::IMAGE_SCN_MEM_16BIT
- : (COFF::SectionCharacteristics)0) |
- COFF::IMAGE_SCN_CNT_CODE |
- COFF::IMAGE_SCN_MEM_EXECUTE |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getText());
- DataSection =
- Ctx->getCOFFSection(".data",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
- ReadOnlySection =
- Ctx->getCOFFSection(".rdata",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ BSSSection = Ctx->getCOFFSection(
+ ".bss", COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getBSS());
+ TextSection = Ctx->getCOFFSection(
+ ".text",
+ (IsWoA ? COFF::IMAGE_SCN_MEM_16BIT : (COFF::SectionCharacteristics)0) |
+ COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ DataSection = Ctx->getCOFFSection(
+ ".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ ReadOnlySection = Ctx->getCOFFSection(
+ ".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
StaticCtorSection =
- Ctx->getCOFFSection(".CRT$XCU",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ Ctx->getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
StaticDtorSection =
- Ctx->getCOFFSection(".CRT$XTX",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ Ctx->getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
} else {
- StaticCtorSection =
- Ctx->getCOFFSection(".ctors",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
- StaticDtorSection =
- Ctx->getCOFFSection(".dtors",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
+ StaticCtorSection = Ctx->getCOFFSection(
+ ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ StaticDtorSection = Ctx->getCOFFSection(
+ ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
}
// FIXME: We're emitting LSDA info into a readonly section on COFF, even
@@ -611,187 +574,149 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
// Debug info.
COFFDebugSymbolsSection =
- Ctx->getCOFFSection(".debug$S",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
-
- DwarfAbbrevSection =
- Ctx->getCOFFSection(".debug_abbrev",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfInfoSection =
- Ctx->getCOFFSection(".debug_info",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfLineSection =
- Ctx->getCOFFSection(".debug_line",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfFrameSection =
- Ctx->getCOFFSection(".debug_frame",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfPubNamesSection =
- Ctx->getCOFFSection(".debug_pubnames",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfPubTypesSection =
- Ctx->getCOFFSection(".debug_pubtypes",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfGnuPubNamesSection =
- Ctx->getCOFFSection(".debug_gnu_pubnames",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfGnuPubTypesSection =
- Ctx->getCOFFSection(".debug_gnu_pubtypes",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfStrSection =
- Ctx->getCOFFSection(".debug_str",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfLocSection =
- Ctx->getCOFFSection(".debug_loc",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfARangesSection =
- Ctx->getCOFFSection(".debug_aranges",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfRangesSection =
- Ctx->getCOFFSection(".debug_ranges",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfMacroInfoSection =
- Ctx->getCOFFSection(".debug_macinfo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfInfoDWOSection =
- Ctx->getCOFFSection(".debug_info.dwo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfTypesDWOSection =
- Ctx->getCOFFSection(".debug_types.dwo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfAbbrevDWOSection =
- Ctx->getCOFFSection(".debug_abbrev.dwo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfStrDWOSection =
- Ctx->getCOFFSection(".debug_str.dwo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfLineDWOSection =
- Ctx->getCOFFSection(".debug_line.dwo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfLocDWOSection =
- Ctx->getCOFFSection(".debug_loc.dwo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfStrOffDWOSection =
- Ctx->getCOFFSection(".debug_str_offsets.dwo",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfAddrSection =
- Ctx->getCOFFSection(".debug_addr",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfAccelNamesSection =
- Ctx->getCOFFSection(".apple_names",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfAccelNamespaceSection =
- Ctx->getCOFFSection(".apple_namespaces",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfAccelTypesSection =
- Ctx->getCOFFSection(".apple_types",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
- DwarfAccelObjCSection =
- Ctx->getCOFFSection(".apple_objc",
- COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
-
- DrectveSection =
- Ctx->getCOFFSection(".drectve",
- COFF::IMAGE_SCN_LNK_INFO |
- COFF::IMAGE_SCN_LNK_REMOVE,
- SectionKind::getMetadata());
-
- PDataSection =
- Ctx->getCOFFSection(".pdata",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getDataRel());
-
- XDataSection =
- Ctx->getCOFFSection(".xdata",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getDataRel());
-
- TLSDataSection =
- Ctx->getCOFFSection(".tls$",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
+ Ctx->getCOFFSection(".debug$S", COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+
+ DwarfAbbrevSection = Ctx->getCOFFSection(
+ ".debug_abbrev",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "section_abbrev");
+ DwarfInfoSection = Ctx->getCOFFSection(
+ ".debug_info",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "section_info");
+ DwarfLineSection = Ctx->getCOFFSection(
+ ".debug_line",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "section_line");
+
+ DwarfFrameSection = Ctx->getCOFFSection(
+ ".debug_frame",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfPubNamesSection = Ctx->getCOFFSection(
+ ".debug_pubnames",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfPubTypesSection = Ctx->getCOFFSection(
+ ".debug_pubtypes",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfGnuPubNamesSection = Ctx->getCOFFSection(
+ ".debug_gnu_pubnames",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfGnuPubTypesSection = Ctx->getCOFFSection(
+ ".debug_gnu_pubtypes",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfStrSection = Ctx->getCOFFSection(
+ ".debug_str",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "info_string");
+ DwarfLocSection = Ctx->getCOFFSection(
+ ".debug_loc",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "section_debug_loc");
+ DwarfARangesSection = Ctx->getCOFFSection(
+ ".debug_aranges",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfRangesSection = Ctx->getCOFFSection(
+ ".debug_ranges",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "debug_range");
+ DwarfInfoDWOSection = Ctx->getCOFFSection(
+ ".debug_info.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "section_info_dwo");
+ DwarfTypesDWOSection = Ctx->getCOFFSection(
+ ".debug_types.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "section_types_dwo");
+ DwarfAbbrevDWOSection = Ctx->getCOFFSection(
+ ".debug_abbrev.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "section_abbrev_dwo");
+ DwarfStrDWOSection = Ctx->getCOFFSection(
+ ".debug_str.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "skel_string");
+ DwarfLineDWOSection = Ctx->getCOFFSection(
+ ".debug_line.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfLocDWOSection = Ctx->getCOFFSection(
+ ".debug_loc.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "skel_loc");
+ DwarfStrOffDWOSection = Ctx->getCOFFSection(
+ ".debug_str_offsets.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+ DwarfAddrSection = Ctx->getCOFFSection(
+ ".debug_addr",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "addr_sec");
+ DwarfAccelNamesSection = Ctx->getCOFFSection(
+ ".apple_names",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "names_begin");
+ DwarfAccelNamespaceSection = Ctx->getCOFFSection(
+ ".apple_namespaces",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "namespac_begin");
+ DwarfAccelTypesSection = Ctx->getCOFFSection(
+ ".apple_types",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "types_begin");
+ DwarfAccelObjCSection = Ctx->getCOFFSection(
+ ".apple_objc",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata(), "objc_begin");
+
+ DrectveSection = Ctx->getCOFFSection(
+ ".drectve", COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE,
+ SectionKind::getMetadata());
+
+ PDataSection = Ctx->getCOFFSection(
+ ".pdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getDataRel());
+
+ XDataSection = Ctx->getCOFFSection(
+ ".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getDataRel());
+
+ TLSDataSection = Ctx->getCOFFSection(
+ ".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
}
void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 08fe501..6aa2de3 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -20,6 +20,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
@@ -31,8 +32,8 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter_,
- MCAssembler *_Assembler)
- : MCStreamer(Context), Assembler(_Assembler), CurSectionData(nullptr),
+ MCAssembler *Assembler)
+ : MCStreamer(Context), Assembler(Assembler), CurSectionData(nullptr),
EmitEHFrame(true), EmitDebugFrame(false) {}
MCObjectStreamer::~MCObjectStreamer() {
@@ -181,10 +182,16 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
void MCObjectStreamer::ChangeSection(const MCSection *Section,
const MCExpr *Subsection) {
+ changeSectionImpl(Section, Subsection);
+}
+
+bool MCObjectStreamer::changeSectionImpl(const MCSection *Section,
+ const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
flushPendingLabels(nullptr);
- CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+ bool Created;
+ CurSectionData = &getAssembler().getOrCreateSectionData(*Section, &Created);
int64_t IntSubsection = 0;
if (Subsection &&
@@ -194,6 +201,7 @@ void MCObjectStreamer::ChangeSection(const MCSection *Section,
report_fatal_error("Subsection number out of range");
CurInsertionPoint =
CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection));
+ return Created;
}
void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 94d7cd6..3c536ec 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -17,11 +17,9 @@ using namespace llvm;
MCObjectWriter::~MCObjectWriter() {
}
-bool
-MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
- const MCSymbolRefExpr *A,
- const MCSymbolRefExpr *B,
- bool InSet) const {
+bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(
+ const MCAssembler &Asm, const MCSymbolRefExpr *A, const MCSymbolRefExpr *B,
+ bool InSet) const {
// Modified symbol references cannot be resolved.
if (A->getKind() != MCSymbolRefExpr::VK_None ||
B->getKind() != MCSymbolRefExpr::VK_None)
@@ -54,3 +52,5 @@ MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
// On ELF and COFF A - B is absolute if A and B are in the same section.
return &SecA == &SecB;
}
+
+bool MCObjectWriter::isWeak(const MCSymbolData &SD) const { return false; }
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 5c8ec66..b983d99 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -21,7 +21,7 @@
#include <cstdlib>
using namespace llvm;
-AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
+AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
CurPtr = nullptr;
isAtStartOfLine = true;
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index ef6a540..2bf980b 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -339,8 +339,8 @@ private:
DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC,
DK_IF, DK_IFEQ, DK_IFGE, DK_IFGT, DK_IFLE, DK_IFLT, DK_IFNE, DK_IFB,
- DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF,
- DK_ELSEIF, DK_ELSE, DK_ENDIF,
+ DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFNES, DK_IFDEF, DK_IFNDEF,
+ DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF,
DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
@@ -435,8 +435,8 @@ private:
bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
// ".ifc" or ".ifnc", depending on ExpectEqual.
bool parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual);
- // ".ifeqs"
- bool parseDirectiveIfeqs(SMLoc DirectiveLoc);
+ // ".ifeqs" or ".ifnes", depending on ExpectEqual.
+ bool parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual);
// ".ifdef" or ".ifndef", depending on expect_defined
bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
bool parseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
@@ -486,10 +486,10 @@ extern MCAsmParserExtension *createCOFFAsmParser();
enum { DEFAULT_ADDRSPACE = 0 };
-AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
- const MCAsmInfo &_MAI)
- : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
- PlatformParser(nullptr), CurBuffer(_SM.getMainFileID()),
+AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+ const MCAsmInfo &MAI)
+ : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
+ PlatformParser(nullptr), CurBuffer(SM.getMainFileID()),
MacrosEnabledFlag(true), HadError(false), CppHashLineNumber(0),
AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) {
// Save the old handler.
@@ -500,7 +500,7 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
// Initialize the platform / file format parser.
- switch (_Ctx.getObjectFileInfo()->getObjectFileType()) {
+ switch (Ctx.getObjectFileInfo()->getObjectFileType()) {
case MCObjectFileInfo::IsCOFF:
PlatformParser.reset(createCOFFAsmParser());
break;
@@ -1244,9 +1244,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
case DK_IFC:
return parseDirectiveIfc(IDLoc, true);
case DK_IFEQS:
- return parseDirectiveIfeqs(IDLoc);
+ return parseDirectiveIfeqs(IDLoc, true);
case DK_IFNC:
return parseDirectiveIfc(IDLoc, false);
+ case DK_IFNES:
+ return parseDirectiveIfeqs(IDLoc, false);
case DK_IFDEF:
return parseDirectiveIfdef(IDLoc, true);
case DK_IFNDEF:
@@ -2791,7 +2793,7 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
if (FileNumber == -1)
getStreamer().EmitFileDirective(Filename);
else {
- if (getContext().getGenDwarfForAssembly() == true)
+ if (getContext().getGenDwarfForAssembly())
Error(DirectiveLoc,
"input can't have .file dwarf directives when -g is "
"used to generate dwarf debug info for assembly code");
@@ -3943,9 +3945,12 @@ bool AsmParser::parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
/// parseDirectiveIfeqs
/// ::= .ifeqs string1, string2
-bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) {
+bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual) {
if (Lexer.isNot(AsmToken::String)) {
- TokError("expected string parameter for '.ifeqs' directive");
+ if (ExpectEqual)
+ TokError("expected string parameter for '.ifeqs' directive");
+ else
+ TokError("expected string parameter for '.ifnes' directive");
eatToEndOfStatement();
return true;
}
@@ -3954,7 +3959,10 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) {
Lex();
if (Lexer.isNot(AsmToken::Comma)) {
- TokError("expected comma after first string for '.ifeqs' directive");
+ if (ExpectEqual)
+ TokError("expected comma after first string for '.ifeqs' directive");
+ else
+ TokError("expected comma after first string for '.ifnes' directive");
eatToEndOfStatement();
return true;
}
@@ -3962,7 +3970,10 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) {
Lex();
if (Lexer.isNot(AsmToken::String)) {
- TokError("expected string parameter for '.ifeqs' directive");
+ if (ExpectEqual)
+ TokError("expected string parameter for '.ifeqs' directive");
+ else
+ TokError("expected string parameter for '.ifnes' directive");
eatToEndOfStatement();
return true;
}
@@ -3972,7 +3983,7 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
- TheCondState.CondMet = String1 == String2;
+ TheCondState.CondMet = ExpectEqual == (String1 == String2);
TheCondState.Ignore = !TheCondState.CondMet;
return false;
@@ -4219,6 +4230,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".ifc"] = DK_IFC;
DirectiveKindMap[".ifeqs"] = DK_IFEQS;
DirectiveKindMap[".ifnc"] = DK_IFNC;
+ DirectiveKindMap[".ifnes"] = DK_IFNES;
DirectiveKindMap[".ifdef"] = DK_IFDEF;
DirectiveKindMap[".ifndef"] = DK_IFNDEF;
DirectiveKindMap[".ifnotdef"] = DK_IFNOTDEF;
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 3ea745e..9102dc3 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -626,7 +626,7 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.secure_log_unique' directive");
- if (getContext().getSecureLogUsed() != false)
+ if (getContext().getSecureLogUsed())
return Error(IDLoc, ".secure_log_unique specified multiple times");
// Get the secure log path.
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index ccf4a7d..7889f83 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -17,6 +18,14 @@ using namespace llvm;
// MCSection
//===----------------------------------------------------------------------===//
+MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) const {
+ if (!End)
+ End = Ctx.createTempSymbol("sec_end", true);
+ return End;
+}
+
+bool MCSection::hasEnded() const { return End && End->isInSection(); }
+
MCSection::~MCSection() {
}
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 46beda4..c9f1591 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -70,8 +70,10 @@ ENTRY(nullptr /*FIXME*/, S_ATTR_LOC_RELOC)
};
MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
- unsigned TAA, unsigned reserved2, SectionKind K)
- : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+ unsigned TAA, unsigned reserved2, SectionKind K,
+ MCSymbol *Begin)
+ : MCSection(SV_MachO, K, Begin), TypeAndAttributes(TAA),
+ Reserved2(reserved2) {
assert(Segment.size() <= 16 && Section.size() <= 16 &&
"Segment or section string too long");
for (unsigned i = 0; i != 16; ++i) {
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index f11ee66..27d0355 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWin64EH.h"
#include "llvm/Support/ErrorHandling.h"
@@ -661,3 +662,30 @@ void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {}
void MCStreamer::EmitBundleLock(bool AlignToEnd) {}
void MCStreamer::FinishImpl() {}
void MCStreamer::EmitBundleUnlock() {}
+
+void MCStreamer::SwitchSection(const MCSection *Section,
+ const MCExpr *Subsection) {
+ assert(Section && "Cannot switch to a null section!");
+ MCSectionSubPair curSection = SectionStack.back().first;
+ SectionStack.back().second = curSection;
+ if (MCSectionSubPair(Section, Subsection) != curSection) {
+ SectionStack.back().first = MCSectionSubPair(Section, Subsection);
+ assert(!Section->hasEnded() && "Section already ended");
+ ChangeSection(Section, Subsection);
+ MCSymbol *Sym = Section->getBeginSymbol();
+ if (Sym && !Sym->isInSection())
+ EmitLabel(Sym);
+ }
+}
+
+MCSymbol *MCStreamer::endSection(const MCSection *Section) {
+ // TODO: keep track of the last subsection so that this symbol appears in the
+ // correct place.
+ MCSymbol *Sym = Section->getEndSymbol(Context);
+ if (Sym->isInSection())
+ return Sym;
+
+ SwitchSection(Section);
+ EmitLabel(Sym);
+ return Sym;
+}
diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp
index 47eaf0f..b1c95f8 100644
--- a/lib/MC/MCWinEH.cpp
+++ b/lib/MC/MCWinEH.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWinEH.h"
#include "llvm/Support/COFF.h"
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 588d424..5e9e86f 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
+#include "llvm/Support/raw_ostream.h"
#include <vector>
using namespace llvm;
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 587be54..ec6c9cb 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -201,9 +201,13 @@ SubtargetFeatures::ToggleFeature(uint64_t Bits, StringRef Feature,
SetImpliedBits(Bits, FeatureEntry, FeatureTable);
}
} else {
- errs() << "'" << Feature
- << "' is not a recognized feature for this target"
- << " (ignoring feature)\n";
+ // Bug: 20140355
+ // Silence this warning for now
+ if (false) {
+ errs() << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
}
return Bits;
@@ -281,9 +285,13 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
}
} else {
- errs() << "'" << Feature
- << "' is not a recognized feature for this target"
- << " (ignoring feature)\n";
+ // Bug: 20140355
+ // Silence this warning for now
+ if (false) {
+ errs() << "'" << Feature
+ << "' is not a recognized feature for this target"
+ << " (ignoring feature)\n";
+ }
}
}
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index c519a9d..c6bc81d 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -175,6 +175,8 @@ public:
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
+ bool isWeak(const MCSymbolData &SD) const override;
+
void RecordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
@@ -661,6 +663,12 @@ bool WinCOFFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
InSet, IsPCRel);
}
+bool WinCOFFObjectWriter::isWeak(const MCSymbolData &SD) const {
+ // FIXME: this is for PR23025. Write a good description on
+ // why this is needed.
+ return SD.isExternal();
+}
+
void WinCOFFObjectWriter::RecordRelocation(
MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 41a3da7..f902d2b 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -230,11 +230,11 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
AssignSection(Symbol, Section);
if (ByteAlignment != 1)
- new MCAlignFragment(ByteAlignment, /*_Value=*/0, /*_ValueSize=*/0,
+ new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0,
ByteAlignment, &SectionData);
MCFillFragment *Fragment =
- new MCFillFragment(/*_Value=*/0, /*_ValueSize=*/0, Size, &SectionData);
+ new MCFillFragment(/*Value=*/0, /*ValueSize=*/0, Size, &SectionData);
SD.setFragment(Fragment);
}
diff --git a/lib/Makefile b/lib/Makefile
index 52fdaaf..f75ca58 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,6 +12,6 @@ include $(LEVEL)/Makefile.config
PARALLEL_DIRS := IR AsmParser Bitcode Analysis Transforms CodeGen Target \
ExecutionEngine Linker LTO MC Object Option DebugInfo \
- IRReader LineEditor ProfileData
+ IRReader LineEditor ProfileData Passes
include $(LEVEL)/Makefile.common
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 43b0771..7d43daa 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -20,6 +20,7 @@
using namespace llvm;
using namespace object;
+using namespace llvm::support::endian;
static const char *const Magic = "!<arch>\n";
static const char *const ThinMagic = "!<thin>\n";
@@ -363,11 +364,9 @@ ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
Offsets += sizeof(uint32_t);
uint32_t Offset = 0;
if (Parent->kind() == K_GNU) {
- Offset =
- *(reinterpret_cast<const support::ubig32_t *>(Offsets) + SymbolIndex);
+ Offset = read32be(Offsets + SymbolIndex * 4);
} else if (Parent->kind() == K_MIPS64) {
- Offset =
- *(reinterpret_cast<const support::ubig64_t *>(Offsets) + SymbolIndex);
+ Offset = read64be(Offsets + SymbolIndex * 8);
} else if (Parent->kind() == K_BSD) {
// The SymbolIndex is an index into the ranlib structs that start at
// Offsets (the first uint32_t is the number of bytes of the ranlib
@@ -375,36 +374,29 @@ ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
// being a string table offset and the second being the offset into
// the archive of the member that defines the symbol. Which is what
// is needed here.
- Offset = *(reinterpret_cast<const support::ulittle32_t *>(Offsets) +
- (SymbolIndex * 2) + 1);
+ Offset = read32le(Offsets + SymbolIndex * 8 + 4);
} else {
- uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
-
// Skip offsets.
- Buf += sizeof(support::ulittle32_t) +
- (MemberCount * sizeof(support::ulittle32_t));
-
- uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
+ uint32_t MemberCount = read32le(Buf);
+ Buf += MemberCount * 4 + 4;
+ uint32_t SymbolCount = read32le(Buf);
if (SymbolIndex >= SymbolCount)
return object_error::parse_failed;
// Skip SymbolCount to get to the indices table.
- const char *Indices = Buf + sizeof(support::ulittle32_t);
+ const char *Indices = Buf + 4;
// Get the index of the offset in the file member offset table for this
// symbol.
- uint16_t OffsetIndex =
- *(reinterpret_cast<const support::ulittle16_t*>(Indices)
- + SymbolIndex);
+ uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
// Subtract 1 since OffsetIndex is 1 based.
--OffsetIndex;
if (OffsetIndex >= MemberCount)
return object_error::parse_failed;
- Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets)
- + OffsetIndex);
+ Offset = read32le(Offsets + OffsetIndex * 4);
}
const char *Loc = Parent->getData().begin() + Offset;
@@ -430,8 +422,7 @@ Archive::Symbol Archive::Symbol::getNext() const {
// the string table followed by the string table.
const char *Buf = Parent->SymbolTable->getBuffer().begin();
uint32_t RanlibCount = 0;
- RanlibCount = (*reinterpret_cast<const support::ulittle32_t *>(Buf)) /
- (sizeof(uint32_t) * 2);
+ RanlibCount = read32le(Buf) / 8;
// If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
// don't change the t.StringIndex as we don't want to reference a ranlib
// past RanlibCount.
@@ -439,10 +430,8 @@ Archive::Symbol Archive::Symbol::getNext() const {
const char *Ranlibs = Buf + 4;
uint32_t CurRanStrx = 0;
uint32_t NextRanStrx = 0;
- CurRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
- (t.SymbolIndex * 2));
- NextRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
- ((t.SymbolIndex + 1) * 2));
+ CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
+ NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
t.StringIndex -= CurRanStrx;
t.StringIndex += NextRanStrx;
}
@@ -462,10 +451,10 @@ Archive::symbol_iterator Archive::symbol_begin() const {
const char *buf = SymbolTable->getBuffer().begin();
if (kind() == K_GNU) {
uint32_t symbol_count = 0;
- symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+ symbol_count = read32be(buf);
buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
} else if (kind() == K_MIPS64) {
- uint64_t symbol_count = *reinterpret_cast<const support::ubig64_t *>(buf);
+ uint64_t symbol_count = read64be(buf);
buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
} else if (kind() == K_BSD) {
// The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
@@ -475,11 +464,10 @@ Archive::symbol_iterator Archive::symbol_begin() const {
// define the symbol. After that the next uint32_t is the byte count of
// the string table followed by the string table.
uint32_t ranlib_count = 0;
- ranlib_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
- (sizeof(uint32_t) * 2);
+ ranlib_count = read32le(buf) / 8;
const char *ranlibs = buf + 4;
uint32_t ran_strx = 0;
- ran_strx = *(reinterpret_cast<const support::ulittle32_t *>(ranlibs));
+ ran_strx = read32le(ranlibs);
buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
// Skip the byte count of the string table.
buf += sizeof(uint32_t);
@@ -487,9 +475,9 @@ Archive::symbol_iterator Archive::symbol_begin() const {
} else {
uint32_t member_count = 0;
uint32_t symbol_count = 0;
- member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ member_count = read32le(buf);
buf += 4 + (member_count * 4); // Skip offsets.
- symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ symbol_count = read32le(buf);
buf += 4 + (symbol_count * 2); // Skip indices.
}
uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
@@ -503,17 +491,16 @@ Archive::symbol_iterator Archive::symbol_end() const {
const char *buf = SymbolTable->getBuffer().begin();
uint32_t symbol_count = 0;
if (kind() == K_GNU) {
- symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
+ symbol_count = read32be(buf);
} else if (kind() == K_MIPS64) {
- symbol_count = *reinterpret_cast<const support::ubig64_t*>(buf);
+ symbol_count = read64be(buf);
} else if (kind() == K_BSD) {
- symbol_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
- (sizeof(uint32_t) * 2);
+ symbol_count = read32le(buf) / 8;
} else {
uint32_t member_count = 0;
- member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ member_count = read32le(buf);
buf += 4 + (member_count * 4); // Skip offsets.
- symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+ symbol_count = read32le(buf);
}
return symbol_iterator(Symbol(this, symbol_count, 0));
}
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index cde6fdc..ad278a4 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -190,7 +190,9 @@ std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
Result = SymbolRef::ST_Data;
} else if (Symb.isFileRecord()) {
Result = SymbolRef::ST_File;
- } else if (SectionNumber == COFF::IMAGE_SYM_DEBUG) {
+ } else if (SectionNumber == COFF::IMAGE_SYM_DEBUG ||
+ Symb.isSectionDefinition()) {
+ // TODO: perhaps we need a new symbol type ST_Section.
Result = SymbolRef::ST_Debug;
} else if (!COFF::isReservedSectionNumber(SectionNumber)) {
const coff_section *Section = nullptr;
@@ -359,12 +361,17 @@ bool COFFObjectFile::isSectionData(DataRefImpl Ref) const {
bool COFFObjectFile::isSectionBSS(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ const uint32_t BssFlags = COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+ return (Sec->Characteristics & BssFlags) == BssFlags;
}
bool COFFObjectFile::isSectionVirtual(DataRefImpl Ref) const {
const coff_section *Sec = toSec(Ref);
- return Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ // In COFF, a virtual section won't have any in-file
+ // content, so the file pointer to the content will be zero.
+ return Sec->PointerToRawData == 0;
}
bool COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef,
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index cce05cf..19527e2 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -235,6 +235,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_ELFOSABI>::enumeration(
ECase(ELFOSABI_NSK)
ECase(ELFOSABI_AROS)
ECase(ELFOSABI_FENIXOS)
+ ECase(ELFOSABI_CLOUDABI)
ECase(ELFOSABI_C6000_ELFABI)
ECase(ELFOSABI_C6000_LINUX)
ECase(ELFOSABI_ARM)
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index a2cbdcd..58c4ca3 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -13,6 +13,7 @@
#include "llvm/Object/IRObjectFile.h"
#include "RecordStreamer.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/LLVMContext.h"
@@ -35,12 +36,9 @@ using namespace object;
IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
: SymbolicFile(Binary::ID_IR, Object), M(std::move(Mod)) {
- // If we have a DataLayout, setup a mangler.
- const DataLayout *DL = M->getDataLayout();
- if (!DL)
- return;
-
- Mang.reset(new Mangler(DL));
+ // Setup a mangler with the DataLayout.
+ const DataLayout &DL = M->getDataLayout();
+ Mang.reset(new Mangler(&DL));
const std::string &InlineAsm = M->getModuleInlineAsm();
if (InlineAsm.empty())
@@ -302,7 +300,9 @@ llvm::object::IRObjectFile::create(MemoryBufferRef Object,
std::unique_ptr<MemoryBuffer> Buff(
MemoryBuffer::getMemBuffer(BCOrErr.get(), false));
- ErrorOr<Module *> MOrErr = getLazyBitcodeModule(std::move(Buff), Context);
+ ErrorOr<Module *> MOrErr =
+ getLazyBitcodeModule(std::move(Buff), Context, nullptr,
+ /*ShouldLazyLoadMetadata*/ true);
if (std::error_code EC = MOrErr.getError())
return EC;
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
index af632d6..ac00073 100644
--- a/lib/Option/Arg.cpp
+++ b/lib/Option/Arg.cpp
@@ -17,22 +17,21 @@
using namespace llvm;
using namespace llvm::opt;
-Arg::Arg(const Option _Opt, StringRef S, unsigned _Index, const Arg *_BaseArg)
- : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
- Claimed(false), OwnsValues(false) {
-}
-
-Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
- const char *Value0, const Arg *_BaseArg)
- : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
- Claimed(false), OwnsValues(false) {
+Arg::Arg(const Option Opt, StringRef S, unsigned Index, const Arg *BaseArg)
+ : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
+ OwnsValues(false) {}
+
+Arg::Arg(const Option Opt, StringRef S, unsigned Index, const char *Value0,
+ const Arg *BaseArg)
+ : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
+ OwnsValues(false) {
Values.push_back(Value0);
}
-Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
- const char *Value0, const char *Value1, const Arg *_BaseArg)
- : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
- Claimed(false), OwnsValues(false) {
+Arg::Arg(const Option Opt, StringRef S, unsigned Index, const char *Value0,
+ const char *Value1, const Arg *BaseArg)
+ : Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
+ OwnsValues(false) {
Values.push_back(Value0);
Values.push_back(Value1);
}
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index 85e956f..4bc8f92 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -63,6 +63,26 @@ Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1) const {
return nullptr;
}
+Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1,
+ OptSpecifier Id2) const {
+ // FIXME: Make search efficient?
+ for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
+ if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) ||
+ (*it)->getOption().matches(Id2))
+ return *it;
+ return nullptr;
+}
+
+Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1,
+ OptSpecifier Id2, OptSpecifier Id3) const {
+ // FIXME: Make search efficient?
+ for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
+ if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) ||
+ (*it)->getOption().matches(Id2) || (*it)->getOption().matches(Id3))
+ return *it;
+ return nullptr;
+}
+
Arg *ArgList::getLastArg(OptSpecifier Id) const {
Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
@@ -285,11 +305,6 @@ void ArgList::ClaimAllArgs() const {
(*it)->claim();
}
-const char *ArgList::MakeArgString(const Twine &T) const {
- SmallString<256> Str;
- return MakeArgString(T.toStringRef(Str));
-}
-
const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
StringRef LHS,
StringRef RHS) const {
@@ -334,19 +349,18 @@ unsigned InputArgList::MakeIndex(StringRef String0,
return Index0;
}
-const char *InputArgList::MakeArgString(StringRef Str) const {
+const char *InputArgList::MakeArgStringRef(StringRef Str) const {
return getArgString(MakeIndex(Str));
}
//
-DerivedArgList::DerivedArgList(const InputArgList &_BaseArgs)
- : BaseArgs(_BaseArgs) {
-}
+DerivedArgList::DerivedArgList(const InputArgList &BaseArgs)
+ : BaseArgs(BaseArgs) {}
DerivedArgList::~DerivedArgList() {}
-const char *DerivedArgList::MakeArgString(StringRef Str) const {
+const char *DerivedArgList::MakeArgStringRef(StringRef Str) const {
return BaseArgs.MakeArgString(Str);
}
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index dca02c1..96ba183 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -84,15 +84,11 @@ static inline bool operator<(const OptTable::Info &I, const char *Name) {
OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
-OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos,
- bool _IgnoreCase)
- : OptionInfos(_OptionInfos),
- NumOptionInfos(_NumOptionInfos),
- IgnoreCase(_IgnoreCase),
- TheInputOptionID(0),
- TheUnknownOptionID(0),
- FirstSearchableIndex(0)
-{
+OptTable::OptTable(const Info *OptionInfos, unsigned NumOptionInfos,
+ bool IgnoreCase)
+ : OptionInfos(OptionInfos), NumOptionInfos(NumOptionInfos),
+ IgnoreCase(IgnoreCase), TheInputOptionID(0), TheUnknownOptionID(0),
+ FirstSearchableIndex(0) {
// Explicitly zero initialize the error to work around a bug in array
// value-initialization on MinGW with gcc 4.3.5.
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index cdc63c3..e29d649 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -35,9 +35,6 @@ Option::Option(const OptTable::Info *info, const OptTable *owner)
}
}
-Option::~Option() {
-}
-
void Option::dump() const {
llvm::errs() << "<";
switch (getKind()) {
diff --git a/lib/Passes/Android.mk b/lib/Passes/Android.mk
new file mode 100644
index 0000000..6e441d7
--- /dev/null
+++ b/lib/Passes/Android.mk
@@ -0,0 +1,30 @@
+LOCAL_PATH:= $(call my-dir)
+
+passes_SRC_FILES := \
+ PassBuilder.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_MODULE:= libLLVMPasses
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := $(passes_SRC_FILES)
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
+include $(CLEAR_VARS)
+
+LOCAL_MODULE:= libLLVMPasses
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := $(passes_SRC_FILES)
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Passes/CMakeLists.txt b/lib/Passes/CMakeLists.txt
new file mode 100644
index 0000000..6ceac7b
--- /dev/null
+++ b/lib/Passes/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_library(LLVMPasses
+ PassBuilder.cpp
+
+ ADDITIONAL_HEADER_DIRS
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Passes
+ )
+
+add_dependencies(LLVMPasses intrinsics_gen)
diff --git a/lib/Passes/LLVMBuild.txt b/lib/Passes/LLVMBuild.txt
new file mode 100644
index 0000000..3063fe3
--- /dev/null
+++ b/lib/Passes/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Passes/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Passes
+parent = Libraries
+required_libraries = Analysis Core IPA IPO InstCombine Scalar Support TransformUtils Vectorize
diff --git a/lib/Passes/Makefile b/lib/Passes/Makefile
new file mode 100644
index 0000000..413dc5c
--- /dev/null
+++ b/lib/Passes/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Passes/Makefile ---------------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMPasses
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
new file mode 100644
index 0000000..ba71320
--- /dev/null
+++ b/lib/Passes/PassBuilder.cpp
@@ -0,0 +1,412 @@
+//===- Parsing, selection, and construction of pass pipelines -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the implementation of the PassBuilder based on our
+/// static pass registry as well as related functionality. It also provides
+/// helpers to aid in analyzing, debugging, and testing passes and pass
+/// pipelines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief No-op module pass which does nothing.
+struct NoOpModulePass {
+ PreservedAnalyses run(Module &M) { return PreservedAnalyses::all(); }
+ static StringRef name() { return "NoOpModulePass"; }
+};
+
+/// \brief No-op module analysis.
+struct NoOpModuleAnalysis {
+ struct Result {};
+ Result run(Module &) { return Result(); }
+ static StringRef name() { return "NoOpModuleAnalysis"; }
+ static void *ID() { return (void *)&PassID; }
+private:
+ static char PassID;
+};
+
+char NoOpModuleAnalysis::PassID;
+
+/// \brief No-op CGSCC pass which does nothing.
+struct NoOpCGSCCPass {
+ PreservedAnalyses run(LazyCallGraph::SCC &C) {
+ return PreservedAnalyses::all();
+ }
+ static StringRef name() { return "NoOpCGSCCPass"; }
+};
+
+/// \brief No-op CGSCC analysis.
+struct NoOpCGSCCAnalysis {
+ struct Result {};
+ Result run(LazyCallGraph::SCC &) { return Result(); }
+ static StringRef name() { return "NoOpCGSCCAnalysis"; }
+ static void *ID() { return (void *)&PassID; }
+private:
+ static char PassID;
+};
+
+char NoOpCGSCCAnalysis::PassID;
+
+/// \brief No-op function pass which does nothing.
+struct NoOpFunctionPass {
+ PreservedAnalyses run(Function &F) { return PreservedAnalyses::all(); }
+ static StringRef name() { return "NoOpFunctionPass"; }
+};
+
+/// \brief No-op function analysis.
+struct NoOpFunctionAnalysis {
+ struct Result {};
+ Result run(Function &) { return Result(); }
+ static StringRef name() { return "NoOpFunctionAnalysis"; }
+ static void *ID() { return (void *)&PassID; }
+private:
+ static char PassID;
+};
+
+char NoOpFunctionAnalysis::PassID;
+
+} // End anonymous namespace.
+
+void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
+ MAM.registerPass(CREATE_PASS);
+#include "PassRegistry.def"
+}
+
+void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
+ CGAM.registerPass(CREATE_PASS);
+#include "PassRegistry.def"
+}
+
+void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ FAM.registerPass(CREATE_PASS);
+#include "PassRegistry.def"
+}
+
+#ifndef NDEBUG
+static bool isModulePassName(StringRef Name) {
+#define MODULE_PASS(NAME, CREATE_PASS) if (Name == NAME) return true;
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
+ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
+ return true;
+#include "PassRegistry.def"
+
+ return false;
+}
+#endif
+
+static bool isCGSCCPassName(StringRef Name) {
+#define CGSCC_PASS(NAME, CREATE_PASS) if (Name == NAME) return true;
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
+ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
+ return true;
+#include "PassRegistry.def"
+
+ return false;
+}
+
+static bool isFunctionPassName(StringRef Name) {
+#define FUNCTION_PASS(NAME, CREATE_PASS) if (Name == NAME) return true;
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
+ return true;
+#include "PassRegistry.def"
+
+ return false;
+}
+
+bool PassBuilder::parseModulePassName(ModulePassManager &MPM, StringRef Name) {
+#define MODULE_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ MPM.addPass(CREATE_PASS); \
+ return true; \
+ }
+#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
+ if (Name == "require<" NAME ">") { \
+ MPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>()); \
+ return true; \
+ } \
+ if (Name == "invalidate<" NAME ">") { \
+ MPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>()); \
+ return true; \
+ }
+#include "PassRegistry.def"
+
+ return false;
+}
+
+bool PassBuilder::parseCGSCCPassName(CGSCCPassManager &CGPM, StringRef Name) {
+#define CGSCC_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ CGPM.addPass(CREATE_PASS); \
+ return true; \
+ }
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
+ if (Name == "require<" NAME ">") { \
+ CGPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>()); \
+ return true; \
+ } \
+ if (Name == "invalidate<" NAME ">") { \
+ CGPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>()); \
+ return true; \
+ }
+#include "PassRegistry.def"
+
+ return false;
+}
+
+bool PassBuilder::parseFunctionPassName(FunctionPassManager &FPM,
+ StringRef Name) {
+#define FUNCTION_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ FPM.addPass(CREATE_PASS); \
+ return true; \
+ }
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
+ if (Name == "require<" NAME ">") { \
+ FPM.addPass(RequireAnalysisPass<decltype(CREATE_PASS)>()); \
+ return true; \
+ } \
+ if (Name == "invalidate<" NAME ">") { \
+ FPM.addPass(InvalidateAnalysisPass<decltype(CREATE_PASS)>()); \
+ return true; \
+ }
+#include "PassRegistry.def"
+
+ return false;
+}
+
+bool PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
+ StringRef &PipelineText,
+ bool VerifyEachPass,
+ bool DebugLogging) {
+ for (;;) {
+ // Parse nested pass managers by recursing.
+ if (PipelineText.startswith("function(")) {
+ FunctionPassManager NestedFPM(DebugLogging);
+
+ // Parse the inner pipeline inte the nested manager.
+ PipelineText = PipelineText.substr(strlen("function("));
+ if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
+ DebugLogging) ||
+ PipelineText.empty())
+ return false;
+ assert(PipelineText[0] == ')');
+ PipelineText = PipelineText.substr(1);
+
+ // Add the nested pass manager with the appropriate adaptor.
+ FPM.addPass(std::move(NestedFPM));
+ } else {
+ // Otherwise try to parse a pass name.
+ size_t End = PipelineText.find_first_of(",)");
+ if (!parseFunctionPassName(FPM, PipelineText.substr(0, End)))
+ return false;
+ if (VerifyEachPass)
+ FPM.addPass(VerifierPass());
+
+ PipelineText = PipelineText.substr(End);
+ }
+
+ if (PipelineText.empty() || PipelineText[0] == ')')
+ return true;
+
+ assert(PipelineText[0] == ',');
+ PipelineText = PipelineText.substr(1);
+ }
+}
+
+bool PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
+ StringRef &PipelineText,
+ bool VerifyEachPass,
+ bool DebugLogging) {
+ for (;;) {
+ // Parse nested pass managers by recursing.
+ if (PipelineText.startswith("cgscc(")) {
+ CGSCCPassManager NestedCGPM(DebugLogging);
+
+ // Parse the inner pipeline into the nested manager.
+ PipelineText = PipelineText.substr(strlen("cgscc("));
+ if (!parseCGSCCPassPipeline(NestedCGPM, PipelineText, VerifyEachPass,
+ DebugLogging) ||
+ PipelineText.empty())
+ return false;
+ assert(PipelineText[0] == ')');
+ PipelineText = PipelineText.substr(1);
+
+ // Add the nested pass manager with the appropriate adaptor.
+ CGPM.addPass(std::move(NestedCGPM));
+ } else if (PipelineText.startswith("function(")) {
+ FunctionPassManager NestedFPM(DebugLogging);
+
+ // Parse the inner pipeline inte the nested manager.
+ PipelineText = PipelineText.substr(strlen("function("));
+ if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
+ DebugLogging) ||
+ PipelineText.empty())
+ return false;
+ assert(PipelineText[0] == ')');
+ PipelineText = PipelineText.substr(1);
+
+ // Add the nested pass manager with the appropriate adaptor.
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(NestedFPM)));
+ } else {
+ // Otherwise try to parse a pass name.
+ size_t End = PipelineText.find_first_of(",)");
+ if (!parseCGSCCPassName(CGPM, PipelineText.substr(0, End)))
+ return false;
+ // FIXME: No verifier support for CGSCC passes!
+
+ PipelineText = PipelineText.substr(End);
+ }
+
+ if (PipelineText.empty() || PipelineText[0] == ')')
+ return true;
+
+ assert(PipelineText[0] == ',');
+ PipelineText = PipelineText.substr(1);
+ }
+}
+
+bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
+ StringRef &PipelineText,
+ bool VerifyEachPass,
+ bool DebugLogging) {
+ for (;;) {
+ // Parse nested pass managers by recursing.
+ if (PipelineText.startswith("module(")) {
+ ModulePassManager NestedMPM(DebugLogging);
+
+ // Parse the inner pipeline into the nested manager.
+ PipelineText = PipelineText.substr(strlen("module("));
+ if (!parseModulePassPipeline(NestedMPM, PipelineText, VerifyEachPass,
+ DebugLogging) ||
+ PipelineText.empty())
+ return false;
+ assert(PipelineText[0] == ')');
+ PipelineText = PipelineText.substr(1);
+
+ // Now add the nested manager as a module pass.
+ MPM.addPass(std::move(NestedMPM));
+ } else if (PipelineText.startswith("cgscc(")) {
+ CGSCCPassManager NestedCGPM(DebugLogging);
+
+ // Parse the inner pipeline inte the nested manager.
+ PipelineText = PipelineText.substr(strlen("cgscc("));
+ if (!parseCGSCCPassPipeline(NestedCGPM, PipelineText, VerifyEachPass,
+ DebugLogging) ||
+ PipelineText.empty())
+ return false;
+ assert(PipelineText[0] == ')');
+ PipelineText = PipelineText.substr(1);
+
+ // Add the nested pass manager with the appropriate adaptor.
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(std::move(NestedCGPM)));
+ } else if (PipelineText.startswith("function(")) {
+ FunctionPassManager NestedFPM(DebugLogging);
+
+ // Parse the inner pipeline inte the nested manager.
+ PipelineText = PipelineText.substr(strlen("function("));
+ if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
+ DebugLogging) ||
+ PipelineText.empty())
+ return false;
+ assert(PipelineText[0] == ')');
+ PipelineText = PipelineText.substr(1);
+
+ // Add the nested pass manager with the appropriate adaptor.
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(NestedFPM)));
+ } else {
+ // Otherwise try to parse a pass name.
+ size_t End = PipelineText.find_first_of(",)");
+ if (!parseModulePassName(MPM, PipelineText.substr(0, End)))
+ return false;
+ if (VerifyEachPass)
+ MPM.addPass(VerifierPass());
+
+ PipelineText = PipelineText.substr(End);
+ }
+
+ if (PipelineText.empty() || PipelineText[0] == ')')
+ return true;
+
+ assert(PipelineText[0] == ',');
+ PipelineText = PipelineText.substr(1);
+ }
+}
+
+// Primary pass pipeline description parsing routine.
+// FIXME: Should this routine accept a TargetMachine or require the caller to
+// pre-populate the analysis managers with target-specific stuff?
+bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
+ StringRef PipelineText, bool VerifyEachPass,
+ bool DebugLogging) {
+ // By default, try to parse the pipeline as-if it were within an implicit
+ // 'module(...)' pass pipeline. If this will parse at all, it needs to
+ // consume the entire string.
+ if (parseModulePassPipeline(MPM, PipelineText, VerifyEachPass, DebugLogging))
+ return PipelineText.empty();
+
+ // This isn't parsable as a module pipeline, look for the end of a pass name
+ // and directly drop down to that layer.
+ StringRef FirstName =
+ PipelineText.substr(0, PipelineText.find_first_of(",)"));
+ assert(!isModulePassName(FirstName) &&
+ "Already handled all module pipeline options.");
+
+ // If this looks like a CGSCC pass, parse the whole thing as a CGSCC
+ // pipeline.
+ if (isCGSCCPassName(FirstName)) {
+ CGSCCPassManager CGPM(DebugLogging);
+ if (!parseCGSCCPassPipeline(CGPM, PipelineText, VerifyEachPass,
+ DebugLogging) ||
+ !PipelineText.empty())
+ return false;
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+ return true;
+ }
+
+ // Similarly, if this looks like a Function pass, parse the whole thing as
+ // a Function pipelien.
+ if (isFunctionPassName(FirstName)) {
+ FunctionPassManager FPM(DebugLogging);
+ if (!parseFunctionPassPipeline(FPM, PipelineText, VerifyEachPass,
+ DebugLogging) ||
+ !PipelineText.empty())
+ return false;
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def
new file mode 100644
index 0000000..d768a3a
--- /dev/null
+++ b/lib/Passes/PassRegistry.def
@@ -0,0 +1,77 @@
+//===- PassRegistry.def - Registry of passes --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used as the registry of passes that are part of the core LLVM
+// libraries. This file describes both transformation passes and analyses
+// Analyses are registered while transformation passes have names registered
+// that can be used when providing a textual pass pipeline.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef MODULE_ANALYSIS
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)
+#endif
+MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis())
+MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis())
+MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
+#undef MODULE_ANALYSIS
+
+#ifndef MODULE_PASS
+#define MODULE_PASS(NAME, CREATE_PASS)
+#endif
+MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+MODULE_PASS("no-op-module", NoOpModulePass())
+MODULE_PASS("print", PrintModulePass(dbgs()))
+MODULE_PASS("print-cg", LazyCallGraphPrinterPass(dbgs()))
+MODULE_PASS("verify", VerifierPass())
+#undef MODULE_PASS
+
+#ifndef CGSCC_ANALYSIS
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)
+#endif
+CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis())
+#undef CGSCC_ANALYSIS
+
+#ifndef CGSCC_PASS
+#define CGSCC_PASS(NAME, CREATE_PASS)
+#endif
+CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
+#undef CGSCC_PASS
+
+#ifndef FUNCTION_ANALYSIS
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)
+#endif
+FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis())
+FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
+FUNCTION_ANALYSIS("loops", LoopAnalysis())
+FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
+FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
+FUNCTION_ANALYSIS("targetir",
+ TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
+#undef FUNCTION_ANALYSIS
+
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+FUNCTION_PASS("early-cse", EarlyCSEPass())
+FUNCTION_PASS("instcombine", InstCombinePass())
+FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+FUNCTION_PASS("no-op-function", NoOpFunctionPass())
+FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
+FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
+FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
+FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
+FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
+FUNCTION_PASS("verify", VerifierPass())
+FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
+#undef FUNCTION_PASS
diff --git a/lib/ProfileData/CoverageMapping.cpp b/lib/ProfileData/CoverageMapping.cpp
index 31213d7..46d494b 100644
--- a/lib/ProfileData/CoverageMapping.cpp
+++ b/lib/ProfileData/CoverageMapping.cpp
@@ -20,6 +20,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace coverage;
@@ -217,12 +218,13 @@ CoverageMapping::load(CoverageMappingReader &CoverageReader,
}
ErrorOr<std::unique_ptr<CoverageMapping>>
-CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename) {
+CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename,
+ Triple::ArchType Arch) {
auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename);
if (std::error_code EC = CounterMappingBuff.getError())
return EC;
auto CoverageReaderOrErr =
- BinaryCoverageReader::create(CounterMappingBuff.get());
+ BinaryCoverageReader::create(CounterMappingBuff.get(), Arch);
if (std::error_code EC = CoverageReaderOrErr.getError())
return EC;
auto CoverageReader = std::move(CoverageReaderOrErr.get());
diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp
index d32f1da..12e9e88 100644
--- a/lib/ProfileData/CoverageMappingReader.cpp
+++ b/lib/ProfileData/CoverageMappingReader.cpp
@@ -14,9 +14,12 @@
#include "llvm/ProfileData/CoverageMappingReader.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace coverage;
@@ -287,24 +290,6 @@ std::error_code RawCoverageMappingReader::read() {
}
namespace {
-/// \brief The coverage mapping data for a single function.
-/// It points to the function's name.
-template <typename IntPtrT> struct CoverageMappingFunctionRecord {
- IntPtrT FunctionNamePtr;
- uint32_t FunctionNameSize;
- uint32_t CoverageMappingSize;
- uint64_t FunctionHash;
-};
-
-/// \brief The coverage mapping data for a single translation unit.
-/// It points to the array of function coverage mapping records and the encoded
-/// filenames array.
-template <typename IntPtrT> struct CoverageMappingTURecord {
- uint32_t FunctionRecordsSize;
- uint32_t FilenamesSize;
- uint32_t CoverageMappingsSize;
- uint32_t Version;
-};
/// \brief A helper structure to access the data from a section
/// in an object file.
@@ -331,77 +316,76 @@ struct SectionData {
};
}
-template <typename T>
+template <typename T, support::endianness Endian>
std::error_code readCoverageMappingData(
SectionData &ProfileNames, StringRef Data,
std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
std::vector<StringRef> &Filenames) {
+ using namespace support;
llvm::DenseSet<T> UniqueFunctionMappingData;
// Read the records in the coverage data section.
- while (!Data.empty()) {
- if (Data.size() < sizeof(CoverageMappingTURecord<T>))
+ for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) {
+ if (Buf + 4 * sizeof(uint32_t) > End)
return instrprof_error::malformed;
- auto TU = reinterpret_cast<const CoverageMappingTURecord<T> *>(Data.data());
- Data = Data.substr(sizeof(CoverageMappingTURecord<T>));
- switch (TU->Version) {
+ uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+ uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+ uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+ uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf);
+
+ switch (Version) {
case CoverageMappingVersion1:
break;
default:
return instrprof_error::unsupported_version;
}
- auto Version = CoverageMappingVersion(TU->Version);
- // Get the function records.
- auto FunctionRecords =
- reinterpret_cast<const CoverageMappingFunctionRecord<T> *>(Data.data());
- if (Data.size() <
- sizeof(CoverageMappingFunctionRecord<T>) * TU->FunctionRecordsSize)
- return instrprof_error::malformed;
- Data = Data.substr(sizeof(CoverageMappingFunctionRecord<T>) *
- TU->FunctionRecordsSize);
+ // Skip past the function records, saving the start and end for later.
+ const char *FunBuf = Buf;
+ Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t));
+ const char *FunEnd = Buf;
// Get the filenames.
- if (Data.size() < TU->FilenamesSize)
+ if (Buf + FilenamesSize > End)
return instrprof_error::malformed;
- auto RawFilenames = Data.substr(0, TU->FilenamesSize);
- Data = Data.substr(TU->FilenamesSize);
size_t FilenamesBegin = Filenames.size();
- RawCoverageFilenamesReader Reader(RawFilenames, Filenames);
+ RawCoverageFilenamesReader Reader(StringRef(Buf, FilenamesSize), Filenames);
if (auto Err = Reader.read())
return Err;
+ Buf += FilenamesSize;
- // Get the coverage mappings.
- if (Data.size() < TU->CoverageMappingsSize)
+ // We'll read the coverage mapping records in the loop below.
+ const char *CovBuf = Buf;
+ Buf += CoverageSize;
+ const char *CovEnd = Buf;
+ if (Buf > End)
return instrprof_error::malformed;
- auto CoverageMappings = Data.substr(0, TU->CoverageMappingsSize);
- Data = Data.substr(TU->CoverageMappingsSize);
- for (unsigned I = 0; I < TU->FunctionRecordsSize; ++I) {
- auto &MappingRecord = FunctionRecords[I];
+ while (FunBuf < FunEnd) {
+ // Read the function information
+ T NamePtr = endian::readNext<T, Endian, unaligned>(FunBuf);
+ uint32_t NameSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
+ uint32_t DataSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
+ uint64_t FuncHash = endian::readNext<uint64_t, Endian, unaligned>(FunBuf);
- // Get the coverage mapping.
- if (CoverageMappings.size() < MappingRecord.CoverageMappingSize)
+ // Now use that to read the coverage data.
+ if (CovBuf + DataSize > CovEnd)
return instrprof_error::malformed;
- auto Mapping =
- CoverageMappings.substr(0, MappingRecord.CoverageMappingSize);
- CoverageMappings =
- CoverageMappings.substr(MappingRecord.CoverageMappingSize);
+ auto Mapping = StringRef(CovBuf, DataSize);
+ CovBuf += DataSize;
// Ignore this record if we already have a record that points to the same
- // function name.
- // This is useful to ignore the redundant records for the functions
- // with ODR linkage.
- if (!UniqueFunctionMappingData.insert(MappingRecord.FunctionNamePtr)
- .second)
+ // function name. This is useful to ignore the redundant records for the
+ // functions with ODR linkage.
+ if (!UniqueFunctionMappingData.insert(NamePtr).second)
continue;
- StringRef FunctionName;
- if (auto Err =
- ProfileNames.get(MappingRecord.FunctionNamePtr,
- MappingRecord.FunctionNameSize, FunctionName))
- return Err;
+
+ // Finally, grab the name and create a record.
+ StringRef FuncName;
+ if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName))
+ return EC;
Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
- Version, FunctionName, MappingRecord.FunctionHash, Mapping,
+ CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
FilenamesBegin, Filenames.size() - FilenamesBegin));
}
}
@@ -414,8 +398,10 @@ static const char *TestingFormatMagic = "llvmcovmtestdata";
static std::error_code loadTestingFormat(StringRef Data,
SectionData &ProfileNames,
StringRef &CoverageMapping,
- uint8_t &BytesInAddress) {
+ uint8_t &BytesInAddress,
+ support::endianness &Endian) {
BytesInAddress = 8;
+ Endian = support::endianness::little;
Data = Data.substr(StringRef(TestingFormatMagic).size());
if (Data.size() < 1)
@@ -444,12 +430,35 @@ static std::error_code loadTestingFormat(StringRef Data,
static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
SectionData &ProfileNames,
StringRef &CoverageMapping,
- uint8_t &BytesInAddress) {
- auto ObjectFileOrErr = object::ObjectFile::createObjectFile(ObjectBuffer);
- if (std::error_code EC = ObjectFileOrErr.getError())
+ uint8_t &BytesInAddress,
+ support::endianness &Endian,
+ Triple::ArchType Arch) {
+ auto BinOrErr = object::createBinary(ObjectBuffer);
+ if (std::error_code EC = BinOrErr.getError())
return EC;
- auto OF = std::move(ObjectFileOrErr.get());
+ auto Bin = std::move(BinOrErr.get());
+ std::unique_ptr<ObjectFile> OF;
+ if (auto *Universal = dyn_cast<object::MachOUniversalBinary>(Bin.get())) {
+ // If we have a universal binary, try to look up the object for the
+ // appropriate architecture.
+ auto ObjectFileOrErr = Universal->getObjectForArch(Arch);
+ if (std::error_code EC = ObjectFileOrErr.getError())
+ return EC;
+ OF = std::move(ObjectFileOrErr.get());
+ } else if (isa<object::ObjectFile>(Bin.get())) {
+ // For any other object file, upcast and take ownership.
+ OF.reset(cast<object::ObjectFile>(Bin.release()));
+ // If we've asked for a particular arch, make sure they match.
+ if (Arch != Triple::ArchType::UnknownArch && OF->getArch() != Arch)
+ return object_error::arch_not_found;
+ } else
+ // We can only handle object files.
+ return instrprof_error::malformed;
+
+ // The coverage uses native pointer sizes for the object it's written in.
BytesInAddress = OF->getBytesInAddress();
+ Endian = OF->isLittleEndian() ? support::endianness::little
+ : support::endianness::big;
// Look for the sections that we are interested in.
int FoundSectionCount = 0;
@@ -479,28 +488,36 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
}
ErrorOr<std::unique_ptr<BinaryCoverageReader>>
-BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer) {
+BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
+ Triple::ArchType Arch) {
std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
SectionData Profile;
StringRef Coverage;
uint8_t BytesInAddress;
+ support::endianness Endian;
std::error_code EC;
if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic))
// This is a special format used for testing.
EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage,
- BytesInAddress);
+ BytesInAddress, Endian);
else
EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage,
- BytesInAddress);
+ BytesInAddress, Endian, Arch);
if (EC)
return EC;
- if (BytesInAddress == 4)
- EC = readCoverageMappingData<uint32_t>(
+ if (BytesInAddress == 4 && Endian == support::endianness::little)
+ EC = readCoverageMappingData<uint32_t, support::endianness::little>(
+ Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+ else if (BytesInAddress == 4 && Endian == support::endianness::big)
+ EC = readCoverageMappingData<uint32_t, support::endianness::big>(
+ Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+ else if (BytesInAddress == 8 && Endian == support::endianness::little)
+ EC = readCoverageMappingData<uint64_t, support::endianness::little>(
Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
- else if (BytesInAddress == 8)
- EC = readCoverageMappingData<uint64_t>(
+ else if (BytesInAddress == 8 && Endian == support::endianness::big)
+ EC = readCoverageMappingData<uint64_t, support::endianness::big>(
Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
else
return instrprof_error::malformed;
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index 01e199d..3a5b266 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -14,6 +14,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "InstrProfIndexed.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ProfileData/InstrProf.h"
#include <cassert>
@@ -112,7 +113,7 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
// Read the function hash.
if (Line.is_at_end())
return error(instrprof_error::truncated);
- if ((Line++)->getAsInteger(10, Record.Hash))
+ if ((Line++)->getAsInteger(0, Record.Hash))
return error(instrprof_error::malformed);
// Read the number of counters.
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 393ecf4..5a402bb 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1248,10 +1248,10 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
return false;
case rmTowardPositive:
- return sign == false;
+ return !sign;
case rmTowardNegative:
- return sign == true;
+ return sign;
}
llvm_unreachable("Invalid rounding mode found");
}
@@ -1430,7 +1430,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
/* Determine if the operation on the absolute values is effectively
an addition or subtraction. */
- subtract ^= (sign ^ rhs.sign) ? true : false;
+ subtract ^= static_cast<bool>(sign ^ rhs.sign);
/* Are we bigger exponent-wise than the RHS? */
bits = exponent - rhs.exponent;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 50a639c..2533fa0 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -672,6 +672,14 @@ hash_code llvm::hash_value(const APInt &Arg) {
return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
}
+bool APInt::isSplat(unsigned SplatSizeInBits) const {
+ assert(getBitWidth() % SplatSizeInBits == 0 &&
+ "SplatSizeInBits must divide width!");
+ // We can check that all parts of an integer are equal by making use of a
+ // little trick: rotate and check if it's still the same value.
+ return *this == rotl(SplatSizeInBits);
+}
+
/// HiBits - This function returns the high "numBits" bits of this APInt.
APInt APInt::getHiBits(unsigned numBits) const {
return APIntOps::lshr(*this, BitWidth - numBits);
@@ -1310,13 +1318,8 @@ APInt APInt::sqrt() const {
// libc sqrt function which will probably use a hardware sqrt computation.
// This should be faster than the algorithm below.
if (magnitude < 52) {
-#if HAVE_ROUND
return APInt(BitWidth,
uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
-#else
- return APInt(BitWidth,
- uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5));
-#endif
}
// Okay, all the short cuts are exhausted. We must compute it. The following
@@ -1508,21 +1511,18 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
assert(u && "Must provide dividend");
assert(v && "Must provide divisor");
assert(q && "Must provide quotient");
- assert(u != v && u != q && v != q && "Must us different memory");
+ assert(u != v && u != q && v != q && "Must use different memory");
assert(n>1 && "n must be > 1");
- // Knuth uses the value b as the base of the number system. In our case b
- // is 2^31 so we just set it to -1u.
- uint64_t b = uint64_t(1) << 32;
+ // b denotes the base of the number system. In our case b is 2^32.
+ LLVM_CONSTEXPR uint64_t b = uint64_t(1) << 32;
-#if 0
DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
DEBUG(dbgs() << "KnuthDiv: original:");
DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
DEBUG(dbgs() << " by");
DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
DEBUG(dbgs() << '\n');
-#endif
// D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
// u and v by d. Note that we have taken Knuth's advice here to use a power
// of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
@@ -1547,13 +1547,12 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
}
}
u[m+n] = u_carry;
-#if 0
+
DEBUG(dbgs() << "KnuthDiv: normal:");
DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
DEBUG(dbgs() << " by");
DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
DEBUG(dbgs() << '\n');
-#endif
// D2. [Initialize j.] Set j to m. This is the loop counter over the places.
int j = m;
@@ -1583,46 +1582,35 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
// (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
// consists of a simple multiplication by a one-place number, combined with
// a subtraction.
+ // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+ // this step is actually negative, (u[j+n]...u[j]) should be left as the
+ // true value plus b**(n+1), namely as the b's complement of
+ // the true value, and a "borrow" to the left should be remembered.
bool isNeg = false;
for (unsigned i = 0; i < n; ++i) {
- uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
+ uint64_t u_tmp = (uint64_t(u[j+i+1]) << 32) | uint64_t(u[j+i]);
uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
bool borrow = subtrahend > u_tmp;
- DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
- << ", subtrahend == " << subtrahend
+ DEBUG(dbgs() << "KnuthDiv: u_tmp = " << u_tmp
+ << ", subtrahend = " << subtrahend
<< ", borrow = " << borrow << '\n');
uint64_t result = u_tmp - subtrahend;
unsigned k = j + i;
- u[k++] = (unsigned)(result & (b-1)); // subtract low word
- u[k++] = (unsigned)(result >> 32); // subtract high word
- while (borrow && k <= m+n) { // deal with borrow to the left
+ u[k++] = (unsigned)result; // subtraction low word
+ u[k++] = (unsigned)(result >> 32); // subtraction high word
+ while (borrow && k <= m+n) { // deal with borrow to the left
borrow = u[k] == 0;
u[k]--;
k++;
}
isNeg |= borrow;
- DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " <<
- u[j+i+1] << '\n');
+ DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
+ << ", u[j+i+1] = " << u[j+i+1] << '\n');
}
DEBUG(dbgs() << "KnuthDiv: after subtraction:");
DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
DEBUG(dbgs() << '\n');
- // The digits (u[j+n]...u[j]) should be kept positive; if the result of
- // this step is actually negative, (u[j+n]...u[j]) should be left as the
- // true value plus b**(n+1), namely as the b's complement of
- // the true value, and a "borrow" to the left should be remembered.
- //
- if (isNeg) {
- bool carry = true; // true because b's complement is "complement + 1"
- for (unsigned i = 0; i <= m+n; ++i) {
- u[i] = ~u[i] + carry; // b's complement
- carry = carry && u[i] == 0;
- }
- }
- DEBUG(dbgs() << "KnuthDiv: after complement:");
- DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
- DEBUG(dbgs() << '\n');
// D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
// negative, go to step D6; otherwise go on to step D7.
@@ -1644,7 +1632,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
u[j+n] += carry;
}
DEBUG(dbgs() << "KnuthDiv: after correction:");
- DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
+ DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
// D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
@@ -1677,9 +1665,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
}
DEBUG(dbgs() << '\n');
}
-#if 0
DEBUG(dbgs() << '\n');
-#endif
}
void APInt::divide(const APInt LHS, unsigned lhsWords,
@@ -1803,6 +1789,8 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
// The quotient is in Q. Reconstitute the quotient into Quotient's low
// order words.
+ // This case is currently dead as all users of divide() handle trivial cases
+ // earlier.
if (lhsWords == 1) {
uint64_t tmp =
uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
@@ -2296,13 +2284,13 @@ void APInt::dump() const {
this->toStringUnsigned(U);
this->toStringSigned(S);
dbgs() << "APInt(" << BitWidth << "b, "
- << U.str() << "u " << S.str() << "s)";
+ << U << "u " << S << "s)";
}
void APInt::print(raw_ostream &OS, bool isSigned) const {
SmallString<40> S;
this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
- OS << S.str();
+ OS << S;
}
// This implements a variety of operations on a representation of
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 7c306b2..f48edac 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -12,12 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Recycler.h"
#include "llvm/Support/raw_ostream.h"
-#include <cstring>
namespace llvm {
diff --git a/lib/Support/Android.mk b/lib/Support/Android.mk
index 34448a7..4d1f526 100644
--- a/lib/Support/Android.mk
+++ b/lib/Support/Android.mk
@@ -35,8 +35,6 @@ support_SRC_FILES := \
IntervalMap.cpp \
IntEqClasses.cpp \
IntrusiveRefCntPtr.cpp \
- IsInf.cpp \
- IsNAN.cpp \
LEB128.cpp \
LineIterator.cpp \
Locale.cpp \
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index a44c1a3..684afa9 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -58,8 +58,6 @@ add_llvm_library(LLVMSupport
IntEqClasses.cpp
IntervalMap.cpp
IntrusiveRefCntPtr.cpp
- IsInf.cpp
- IsNAN.cpp
LEB128.cpp
LineIterator.cpp
Locale.cpp
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index b49ec36..af6c605 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm-c/Support.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
@@ -32,10 +33,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
-#include <cerrno>
#include <cstdlib>
#include <map>
-#include <system_error>
using namespace llvm;
using namespace cl;
@@ -1463,10 +1462,9 @@ void basic_parser_impl::printOptionNoValue(const Option &O,
// -help and -help-hidden option implementation
//
-static int OptNameCompare(const void *LHS, const void *RHS) {
- typedef std::pair<const char *, Option *> pair_ty;
-
- return strcmp(((const pair_ty *)LHS)->first, ((const pair_ty *)RHS)->first);
+static int OptNameCompare(const std::pair<const char *, Option *> *LHS,
+ const std::pair<const char *, Option *> *RHS) {
+ return strcmp(LHS->first, RHS->first);
}
// Copy Options into a vector so we can sort them as we like.
@@ -1494,7 +1492,7 @@ static void sortOpts(StringMap<Option *> &OptMap,
}
// Sort the options list alphabetically.
- qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
+ array_pod_sort(Opts.begin(), Opts.end(), OptNameCompare);
}
namespace {
@@ -1516,7 +1514,7 @@ public:
// Invoke the printer.
void operator=(bool Value) {
- if (Value == false)
+ if (!Value)
return;
StrOptionPairVector Opts;
@@ -1562,10 +1560,11 @@ public:
explicit CategorizedHelpPrinter(bool showHidden) : HelpPrinter(showHidden) {}
// Helper function for printOptions().
- // It shall return true if A's name should be lexographically
- // ordered before B's name. It returns false otherwise.
- static bool OptionCategoryCompare(OptionCategory *A, OptionCategory *B) {
- return strcmp(A->getName(), B->getName()) < 0;
+ // It shall return a negative value if A's name should be lexicographically
+ // ordered before B's name. It returns a value greater equal zero otherwise.
+ static int OptionCategoryCompare(OptionCategory *const *A,
+ OptionCategory *const *B) {
+ return strcmp((*A)->getName(), (*B)->getName());
}
// Make sure we inherit our base class's operator=()
@@ -1586,8 +1585,8 @@ protected:
// Sort the different option categories alphabetically.
assert(SortedCategories.size() > 0 && "No option categories registered!");
- std::sort(SortedCategories.begin(), SortedCategories.end(),
- OptionCategoryCompare);
+ array_pod_sort(SortedCategories.begin(), SortedCategories.end(),
+ OptionCategoryCompare);
// Create map to empty vectors.
for (std::vector<OptionCategory *>::const_iterator
@@ -1716,7 +1715,7 @@ static cl::opt<bool> PrintAllOptions(
cl::init(false), cl::cat(GenericCategory));
void HelpPrinterWrapper::operator=(bool Value) {
- if (Value == false)
+ if (!Value)
return;
// Decide which printer to invoke. If more than one option category is
diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp
index 17ae295..b54613e 100644
--- a/lib/Support/Compression.cpp
+++ b/lib/Support/Compression.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Compression.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 9b0e443..aba0f1d 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -8,13 +8,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/CrashRecoveryContext.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/ThreadLocal.h"
-#include <cstdio>
#include <setjmp.h>
using namespace llvm;
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index 0d504ee..f1a334b 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -63,9 +63,6 @@ private:
DAGDeltaAlgorithm &DDA;
- const changeset_ty &Changes;
- const std::vector<edge_ty> &Dependencies;
-
std::vector<change_ty> Roots;
/// Cache of failed test results. Successful test results are never cached
@@ -139,9 +136,8 @@ private:
}
public:
- DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
- const changeset_ty &_Changes,
- const std::vector<edge_ty> &_Dependencies);
+ DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &DDA, const changeset_ty &Changes,
+ const std::vector<edge_ty> &Dependencies);
changeset_ty Run();
@@ -174,21 +170,17 @@ protected:
}
public:
- DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI,
- const changeset_ty &_Required)
- : DDAI(_DDAI), Required(_Required) {}
+ DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &DDAI,
+ const changeset_ty &Required)
+ : DDAI(DDAI), Required(Required) {}
};
}
-DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
- const changeset_ty &_Changes,
- const std::vector<edge_ty>
- &_Dependencies)
- : DDA(_DDA),
- Changes(_Changes),
- Dependencies(_Dependencies)
-{
+DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(
+ DAGDeltaAlgorithm &DDA, const changeset_ty &Changes,
+ const std::vector<edge_ty> &Dependencies)
+ : DDA(DDA) {
for (changeset_ty::const_iterator it = Changes.begin(),
ie = Changes.end(); it != ie; ++it) {
Predecessors.insert(std::make_pair(*it, std::vector<change_ty>()));
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index dbf6465..a44b958 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -18,8 +18,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Program.h"
-#include <cerrno>
-#include <cstdio>
#include <string>
#include <system_error>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 9c58ae8..a88b18e 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/raw_ostream.h"
#undef isCurrentDebugType
#undef setCurrentDebugType
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index b176a8b..307ff09 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Errc.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Errc.h"
#include <system_error>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -77,9 +76,16 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size,
if (EC)
return EC;
+#ifndef LLVM_ON_WIN32
+ // On Windows, CreateFileMapping (the mmap function on Windows)
+ // automatically extends the underlying file. We don't need to
+ // extend the file beforehand. _chsize (ftruncate on Windows) is
+ // pretty slow just like it writes specified amount of bytes,
+ // so we should avoid calling that.
EC = sys::fs::resize_file(FD, Size);
if (EC)
return EC;
+#endif
auto MappedFile = llvm::make_unique<mapped_file_region>(
FD, mapped_file_region::readwrite, Size, 0, EC);
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 4635114..80d2aef 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -101,6 +101,8 @@ void FoldingSetNodeID::AddString(StringRef String) {
// Otherwise do it the hard way.
// To be compatible with above bulk transfer, we need to take endianness
// into account.
+ static_assert(sys::IsBigEndianHost || sys::IsLittleEndianHost,
+ "Unexpected host endianness");
if (sys::IsBigEndianHost) {
for (Pos += 4; Pos <= Size; Pos += 4) {
unsigned V = ((unsigned char)String[Pos - 4] << 24) |
@@ -109,8 +111,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
(unsigned char)String[Pos - 1];
Bits.push_back(V);
}
- } else {
- assert(sys::IsLittleEndianHost && "Unexpected host endianness");
+ } else { // Little-endian host
for (Pos += 4; Pos <= Size; Pos += 4) {
unsigned V = ((unsigned char)String[Pos - 1] << 24) |
((unsigned char)String[Pos - 2] << 16) |
@@ -222,6 +223,8 @@ static void **AllocateBuckets(unsigned NumBuckets) {
//===----------------------------------------------------------------------===//
// FoldingSetImpl Implementation
+void FoldingSetImpl::anchor() {}
+
FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
assert(5 < Log2InitSize && Log2InitSize < 32 &&
"Initial hash table size out of range");
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index 618ec26..2ed71c7 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 054df52..fd4ce54 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -15,7 +15,6 @@
#include "llvm/Config/config.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
using namespace llvm;
@@ -98,6 +97,7 @@ static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args,
return false;
}
+namespace {
struct GraphSession {
std::string LogBuffer;
bool TryFindProgram(StringRef Names, std::string &ProgramPath) {
@@ -114,6 +114,7 @@ struct GraphSession {
return false;
}
};
+} // namespace
static const char *getProgramName(GraphProgram::Name program) {
switch (program) {
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 42bc342..0e9a62e 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -357,10 +357,16 @@ StringRef sys::getHostCPUName() {
case 63:
case 69:
case 70:
- // Not all Haswell processors support AVX too (such as the Pentium
+ // Not all Haswell processors support AVX2 (such as the Pentium
// versions instead of the i7 versions).
return HasAVX2 ? "core-avx2" : "corei7";
+ // Broadwell:
+ case 61:
+ // Not all Broadwell processors support AVX2 (such as the Pentium
+ // versions instead of the i7 versions).
+ return HasAVX2 ? "broadwell" : "corei7";
+
case 28: // Most 45 nm Intel Atom processors
case 38: // 45 nm Atom Lincroft
case 39: // 32 nm Atom Medfield
diff --git a/lib/Support/IsInf.cpp b/lib/Support/IsInf.cpp
deleted file mode 100644
index d6da0c9..0000000
--- a/lib/Support/IsInf.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Platform-independent wrapper around C99 isinf()
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-
-#if HAVE_ISINF_IN_MATH_H
-# include <math.h>
-#elif HAVE_ISINF_IN_CMATH
-# include <cmath>
-#elif HAVE_STD_ISINF_IN_CMATH
-# include <cmath>
-using std::isinf;
-#elif HAVE_FINITE_IN_IEEEFP_H
-// A handy workaround I found at http://www.unixguide.net/sun/faq ...
-// apparently this has been a problem with Solaris for years.
-# include <ieeefp.h>
-static int isinf(double x) { return !finite(x) && x==x; }
-#elif defined(_MSC_VER)
-#include <float.h>
-#define isinf(X) (!_finite(X))
-#elif defined(_AIX) && defined(__GNUC__)
-// GCC's fixincludes seems to be removing the isinf() declaration from the
-// system header /usr/include/math.h
-# include <math.h>
-static int isinf(double x) { return !finite(x) && x==x; }
-#elif defined(__hpux)
-// HP-UX is "special"
-#include <math.h>
-static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); }
-#else
-# error "Don't know how to get isinf()"
-#endif
-
-namespace llvm {
-
-int IsInf(float f) { return isinf(f); }
-int IsInf(double d) { return isinf(d); }
-
-} // end namespace llvm;
diff --git a/lib/Support/IsNAN.cpp b/lib/Support/IsNAN.cpp
deleted file mode 100644
index bdfdfbf..0000000
--- a/lib/Support/IsNAN.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===-- IsNAN.cpp ---------------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Platform-independent wrapper around C99 isnan().
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-
-#if HAVE_ISNAN_IN_MATH_H
-# include <math.h>
-#elif HAVE_ISNAN_IN_CMATH
-# include <cmath>
-#elif HAVE_STD_ISNAN_IN_CMATH
-# include <cmath>
-using std::isnan;
-#elif defined(_MSC_VER)
-#include <float.h>
-#define isnan _isnan
-#else
-# error "Don't know how to get isnan()"
-#endif
-
-namespace llvm {
- int IsNAN(float f) { return isnan(f); }
- int IsNAN(double d) { return isnan(d); }
-} // end namespace llvm;
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index ec3158c..d07c5f0 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -7,12 +7,10 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/LockFileManager.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <sys/stat.h>
#include <sys/types.h>
@@ -91,7 +89,7 @@ LockFileManager::LockFileManager(StringRef FileName)
UniqueLockFileName += "-%%%%%%%%";
int UniqueLockFileID;
if (std::error_code EC = sys::fs::createUniqueFile(
- UniqueLockFileName.str(), UniqueLockFileID, UniqueLockFileName)) {
+ UniqueLockFileName, UniqueLockFileID, UniqueLockFileName)) {
Error = EC;
return;
}
@@ -116,7 +114,7 @@ LockFileManager::LockFileManager(StringRef FileName)
// We failed to write out PID, so make up an excuse, remove the
// unique lock file, and fail.
Error = make_error_code(errc::no_space_on_device);
- sys::fs::remove(UniqueLockFileName.c_str());
+ sys::fs::remove(UniqueLockFileName);
return;
}
}
@@ -124,7 +122,7 @@ LockFileManager::LockFileManager(StringRef FileName)
while (1) {
// Create a link from the lock file name. If this succeeds, we're done.
std::error_code EC =
- sys::fs::create_link(UniqueLockFileName.str(), LockFileName.str());
+ sys::fs::create_link(UniqueLockFileName, LockFileName);
if (!EC)
return;
@@ -137,11 +135,11 @@ LockFileManager::LockFileManager(StringRef FileName)
// from the lock file.
if ((Owner = readLockFile(LockFileName))) {
// Wipe out our unique lock file (it's useless now)
- sys::fs::remove(UniqueLockFileName.str());
+ sys::fs::remove(UniqueLockFileName);
return;
}
- if (!sys::fs::exists(LockFileName.str())) {
+ if (!sys::fs::exists(LockFileName)) {
// The previous owner released the lock file before we could read it.
// Try to get ownership again.
continue;
@@ -149,7 +147,7 @@ LockFileManager::LockFileManager(StringRef FileName)
// There is a lock file that nobody owns; try to clean it up and get
// ownership.
- if ((EC = sys::fs::remove(LockFileName.str()))) {
+ if ((EC = sys::fs::remove(LockFileName))) {
Error = EC;
return;
}
@@ -171,8 +169,8 @@ LockFileManager::~LockFileManager() {
return;
// Since we own the lock, remove the lock file and our own unique lock file.
- sys::fs::remove(LockFileName.str());
- sys::fs::remove(UniqueLockFileName.str());
+ sys::fs::remove(LockFileName);
+ sys::fs::remove(UniqueLockFileName);
}
LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
@@ -186,8 +184,9 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
Interval.tv_sec = 0;
Interval.tv_nsec = 1000000;
#endif
- // Don't wait more than one minute for the file to appear.
- const unsigned MaxSeconds = 60;
+ // Don't wait more than five minutes per iteration. Total timeout for the file
+ // to appear is ~8.5 mins.
+ const unsigned MaxSeconds = 5*60;
do {
// Sleep for the designated interval, to allow the owning process time to
// finish up and remove the lock file.
@@ -202,7 +201,7 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
if (sys::fs::access(LockFileName.c_str(), sys::fs::AccessMode::Exist) ==
errc::no_such_file_or_directory) {
// If the original file wasn't created, somone thought the lock was dead.
- if (!sys::fs::exists(FileName.str()))
+ if (!sys::fs::exists(FileName))
return Res_OwnerDied;
return Res_Success;
}
@@ -235,5 +234,5 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
}
std::error_code LockFileManager::unsafeRemoveLockFile() {
- return sys::fs::remove(LockFileName.str());
+ return sys::fs::remove(LockFileName);
}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 379db88..98862e9 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -23,7 +23,6 @@
#include "llvm/Support/Program.h"
#include <cassert>
#include <cerrno>
-#include <cstdio>
#include <cstring>
#include <new>
#include <sys/types.h>
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index a11bb7f..cf467381 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -19,9 +19,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include <cctype>
-#include <cstdio>
#include <cstring>
-#include <fcntl.h>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
@@ -30,6 +28,7 @@
#endif
using namespace llvm;
+using namespace llvm::support::endian;
namespace {
using llvm::StringRef;
@@ -48,7 +47,6 @@ namespace {
// * empty (in this case we return an empty string)
// * either C: or {//,\\}net.
// * {/,\}
- // * {.,..}
// * {file,directory}name
if (path.empty())
@@ -75,12 +73,6 @@ namespace {
if (is_separator(path[0]))
return path.substr(0, 1);
- if (path.startswith(".."))
- return path.substr(0, 2);
-
- if (path[0] == '.')
- return path.substr(0, 1);
-
// * {file,directory}name
size_t end = path.find_first_of(separators);
return path.substr(0, end);
@@ -917,7 +909,7 @@ file_magic identify_magic(StringRef Magic) {
if (Magic.size() < MinSize)
return file_magic::coff_import_library;
- int BigObjVersion = *reinterpret_cast<const support::ulittle16_t*>(
+ int BigObjVersion = read16le(
Magic.data() + offsetof(COFF::BigObjHeader, Version));
if (BigObjVersion < COFF::BigObjHeader::MinBigObjectVersion)
return file_magic::coff_import_library;
@@ -1034,8 +1026,7 @@ file_magic identify_magic(StringRef Magic) {
case 'M': // Possible MS-DOS stub on Windows PE file
if (Magic[1] == 'Z') {
- uint32_t off =
- *reinterpret_cast<const support::ulittle32_t*>(Magic.data() + 0x3c);
+ uint32_t off = read32le(Magic.data() + 0x3c);
// PE/COFF file, either EXE or DLL.
if (off < Magic.size() &&
memcmp(Magic.data()+off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0)
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index ad67e1b..d0c1748 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -13,8 +13,8 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index b84b82b..34e336b 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Program.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include <system_error>
using namespace llvm;
diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp
index 2943137..81d0411 100644
--- a/lib/Support/RandomNumberGenerator.cpp
+++ b/lib/Support/RandomNumberGenerator.cpp
@@ -13,13 +13,15 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "rng"
+#include "llvm/Support/RandomNumberGenerator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "rng"
+
// Tracking BUG: 19665
// http://llvm.org/bugs/show_bug.cgi?id=19665
//
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index f7fe1e4..d3e29ac 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -14,8 +14,7 @@
#include "llvm/Support/Regex.h"
#include "regex_impl.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringRef.h"
#include <string>
using namespace llvm;
diff --git a/lib/Support/ScaledNumber.cpp b/lib/Support/ScaledNumber.cpp
index 6f6699c..987c2d8 100644
--- a/lib/Support/ScaledNumber.cpp
+++ b/lib/Support/ScaledNumber.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/ScaledNumber.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::ScaledNumbers;
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index b50a66b..d5e3157 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -14,13 +14,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/SourceMgr.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Locale.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
-#include <system_error>
using namespace llvm;
static const size_t TabStop = 8;
diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp
index c312cc1..ea417c4 100644
--- a/lib/Support/SpecialCaseList.cpp
+++ b/lib/Support/SpecialCaseList.cpp
@@ -15,13 +15,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/SpecialCaseList.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
#include <string>
#include <system_error>
#include <utility>
diff --git a/lib/Support/StreamingMemoryObject.cpp b/lib/Support/StreamingMemoryObject.cpp
index f39bc56..90f3ed8 100644
--- a/lib/Support/StreamingMemoryObject.cpp
+++ b/lib/Support/StreamingMemoryObject.cpp
@@ -8,12 +8,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/StreamingMemoryObject.h"
-#include "llvm/Support/Compiler.h"
#include <cassert>
#include <cstddef>
#include <cstring>
-
-
using namespace llvm;
namespace {
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index d77ad7f..3e2420f 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
using namespace llvm;
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
index 2036364..7fa6ae3 100644
--- a/lib/Support/SystemUtils.cpp
+++ b/lib/Support/SystemUtils.cpp
@@ -13,8 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index f691883..3ca8572 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -10,7 +10,6 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <vector>
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index e1a531a..d7b6515 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -14,12 +14,10 @@
#include "llvm/Support/Timer.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
-#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index e74b23c..d4b150a 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -141,6 +141,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
switch (Kind) {
case UnknownOS: return "unknown";
+ case CloudABI: return "cloudabi";
case Darwin: return "darwin";
case DragonFly: return "dragonfly";
case FreeBSD: return "freebsd";
@@ -280,6 +281,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
.Cases("v7", "v7a", "v7em", "v7l", arch)
.Cases("v7m", "v7r", "v7s", arch)
.Cases("v8", "v8a", arch)
+ .Cases("v8.1", "v8.1a", arch)
.Default(Triple::UnknownArch);
}
@@ -345,6 +347,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
static Triple::OSType parseOS(StringRef OSName) {
return StringSwitch<Triple::OSType>(OSName)
+ .StartsWith("cloudabi", Triple::CloudABI)
.StartsWith("darwin", Triple::Darwin)
.StartsWith("dragonfly", Triple::DragonFly)
.StartsWith("freebsd", Triple::FreeBSD)
@@ -401,6 +404,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
SubArchName = SubArchName.substr(0, SubArchName.size() - 2);
return StringSwitch<Triple::SubArchType>(SubArchName)
+ .EndsWith("v8.1a", Triple::ARMSubArch_v8_1a)
.EndsWith("v8", Triple::ARMSubArch_v8)
.EndsWith("v8a", Triple::ARMSubArch_v8)
.EndsWith("v7", Triple::ARMSubArch_v7)
@@ -413,6 +417,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
.EndsWith("v6", Triple::ARMSubArch_v6)
.EndsWith("v6m", Triple::ARMSubArch_v6m)
.EndsWith("v6sm", Triple::ARMSubArch_v6m)
+ .EndsWith("v6k", Triple::ARMSubArch_v6k)
.EndsWith("v6t2", Triple::ARMSubArch_v6t2)
.EndsWith("v5", Triple::ARMSubArch_v5)
.EndsWith("v5e", Triple::ARMSubArch_v5)
@@ -436,6 +441,30 @@ static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
}
static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
+ switch (T.getArch()) {
+ default:
+ break;
+ case Triple::hexagon:
+ case Triple::mips:
+ case Triple::mipsel:
+ case Triple::mips64:
+ case Triple::mips64el:
+ case Triple::r600:
+ case Triple::amdgcn:
+ case Triple::sparc:
+ case Triple::sparcv9:
+ case Triple::systemz:
+ case Triple::xcore:
+ case Triple::ppc64le:
+ return Triple::ELF;
+
+ case Triple::ppc:
+ case Triple::ppc64:
+ if (T.isOSDarwin())
+ return Triple::MachO;
+ return Triple::ELF;
+ }
+
if (T.isOSDarwin())
return Triple::MachO;
else if (T.isOSWindows())
@@ -714,6 +743,14 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const {
StringRef OSName = getOSName();
+ // For Android, we care about the Android version rather than the Linux
+ // version.
+ if (getEnvironment() == Android) {
+ OSName = getEnvironmentName().substr(strlen("android"));
+ if (OSName.startswith("eabi"))
+ OSName = OSName.substr(strlen("eabi"));
+ }
+
// Assume that the OS portion of the triple starts with the canonical name.
StringRef OSTypeName = getOSTypeName(getOS());
if (OSName.startswith(OSTypeName))
@@ -839,7 +876,7 @@ void Triple::setArchName(StringRef Str) {
Triple += getVendorName();
Triple += "-";
Triple += getOSAndEnvironmentName();
- setTriple(Triple.str());
+ setTriple(Triple);
}
void Triple::setVendorName(StringRef Str) {
@@ -1063,9 +1100,9 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
.Cases("v5", "v5t", "arm10tdmi")
.Cases("v5e", "v5te", "arm1022e")
.Case("v5tej", "arm926ej-s")
- .Cases("v6", "v6k", "arm1136jf-s")
+ .Case("v6", "arm1136jf-s")
.Case("v6j", "arm1136j-s")
- .Cases("v6z", "v6zk", "arm1176jzf-s")
+ .Cases("v6k", "v6z", "v6zk", "arm1176jzf-s")
.Case("v6t2", "arm1156t2-s")
.Cases("v6m", "v6-m", "v6sm", "v6s-m", "cortex-m0")
.Cases("v7", "v7a", "v7-a", "v7l", "v7-l", "cortex-a8")
@@ -1074,6 +1111,7 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
.Cases("v7m", "v7-m", "cortex-m3")
.Cases("v7em", "v7e-m", "cortex-m4")
.Cases("v8", "v8a", "v8-a", "cortex-a53")
+ .Cases("v8.1a", "v8.1-a", "generic-armv8.1-a")
.Default(nullptr);
else
result = llvm::StringSwitch<const char *>(MArch)
@@ -1099,6 +1137,8 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
default:
return "strongarm";
}
+ case llvm::Triple::NaCl:
+ return "cortex-a8";
default:
switch (getEnvironment()) {
case llvm::Triple::EABIHF:
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 56ed964..d2cc75b 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -28,13 +28,6 @@ void Twine::toVector(SmallVectorImpl<char> &Out) const {
print(OS);
}
-StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
- if (isSingleStringRef())
- return getSingleStringRef();
- toVector(Out);
- return StringRef(Out.data(), Out.size());
-}
-
StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
if (isUnary()) {
switch (getLHSKind()) {
@@ -72,6 +65,9 @@ void Twine::printOneChild(raw_ostream &OS, Child Ptr,
case Twine::StringRefKind:
OS << *Ptr.stringRef;
break;
+ case Twine::SmallStringKind:
+ OS << *Ptr.smallString;
+ break;
case Twine::CharKind:
OS << Ptr.character;
break;
@@ -122,6 +118,10 @@ void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr,
OS << "stringref:\""
<< Ptr.stringRef << "\"";
break;
+ case Twine::SmallStringKind:
+ OS << "smallstring:\""
+ << *Ptr.smallString << "\"";
+ break;
case Twine::CharKind:
OS << "char:\"" << Ptr.character << "\"";
break;
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index baf2767..5816fb8 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -18,10 +18,11 @@
#include "Unix.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
-#include <llvm/Config/config.h>
#if HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 665c7de..a9b48e0 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -14,6 +14,7 @@
#include "Unix.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/ManagedStatic.h"
@@ -324,7 +325,8 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
}
#endif
-static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
+static bool printSymbolizedStackTrace(void **StackTrace, int Depth,
+ llvm::raw_ostream &OS) {
// FIXME: Subtract necessary number from StackTrace entries to turn return addresses
// into actual instruction addresses.
// Use llvm-symbolizer tool to symbolize the stack traces.
@@ -382,7 +384,7 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
int frame_no = 0;
for (int i = 0; i < Depth; i++) {
if (!Modules[i]) {
- fprintf(FD, "#%d %p\n", frame_no++, StackTrace[i]);
+ OS << format("#%d %p\n", frame_no++, StackTrace[i]);
continue;
}
// Read pairs of lines (function name and file/line info) until we
@@ -393,17 +395,17 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
StringRef FunctionName = *CurLine++;
if (FunctionName.empty())
break;
- fprintf(FD, "#%d %p ", frame_no++, StackTrace[i]);
+ OS << format("#%d %p ", frame_no++, StackTrace[i]);
if (!FunctionName.startswith("??"))
- fprintf(FD, "%s ", FunctionName.str().c_str());
+ OS << format("%s ", FunctionName.str().c_str());
if (CurLine == Lines.end())
return false;
StringRef FileLineInfo = *CurLine++;
if (!FileLineInfo.startswith("??"))
- fprintf(FD, "%s", FileLineInfo.str().c_str());
+ OS << format("%s", FileLineInfo.str().c_str());
else
- fprintf(FD, "(%s+%p)", Modules[i], (void *)Offsets[i]);
- fprintf(FD, "\n");
+ OS << format("(%s+%p)", Modules[i], (void *)Offsets[i]);
+ OS << "\n";
}
}
return true;
@@ -415,13 +417,13 @@ static bool printSymbolizedStackTrace(void **StackTrace, int Depth, FILE *FD) {
//
// On glibc systems we have the 'backtrace' function, which works nicely, but
// doesn't demangle symbols.
-void llvm::sys::PrintStackTrace(FILE *FD) {
+void llvm::sys::PrintStackTrace(raw_ostream &OS) {
#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
static void* StackTrace[256];
// Use backtrace() to output a backtrace on Linux systems with glibc.
int depth = backtrace(StackTrace,
static_cast<int>(array_lengthof(StackTrace)));
- if (printSymbolizedStackTrace(StackTrace, depth, FD))
+ if (printSymbolizedStackTrace(StackTrace, depth, OS))
return;
#if HAVE_DLFCN_H && __GNUG__
int width = 0;
@@ -441,34 +443,34 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
Dl_info dlinfo;
dladdr(StackTrace[i], &dlinfo);
- fprintf(FD, "%-2d", i);
+ OS << format("%-2d", i);
const char* name = strrchr(dlinfo.dli_fname, '/');
- if (!name) fprintf(FD, " %-*s", width, dlinfo.dli_fname);
- else fprintf(FD, " %-*s", width, name+1);
+ if (!name) OS << format(" %-*s", width, dlinfo.dli_fname);
+ else OS << format(" %-*s", width, name+1);
- fprintf(FD, " %#0*lx",
- (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
+ OS << format(" %#0*lx", (int)(sizeof(void*) * 2) + 2,
+ (unsigned long)StackTrace[i]);
if (dlinfo.dli_sname != nullptr) {
- fputc(' ', FD);
+ OS << ' ';
# if HAVE_CXXABI_H
int res;
char* d = abi::__cxa_demangle(dlinfo.dli_sname, nullptr, nullptr, &res);
# else
char* d = NULL;
# endif
- if (!d) fputs(dlinfo.dli_sname, FD);
- else fputs(d, FD);
+ if (!d) OS << dlinfo.dli_sname;
+ else OS << d;
free(d);
// FIXME: When we move to C++11, use %t length modifier. It's not in
// C++03 and causes gcc to issue warnings. Losing the upper 32 bits of
// the stack offset for a stack dump isn't likely to cause any problems.
- fprintf(FD, " + %u",(unsigned)((char*)StackTrace[i]-
- (char*)dlinfo.dli_saddr));
+ OS << format(" + %u",(unsigned)((char*)StackTrace[i]-
+ (char*)dlinfo.dli_saddr));
}
- fputc('\n', FD);
+ OS << '\n';
}
#else
backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
@@ -477,7 +479,7 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
}
static void PrintStackTraceSignalHandler(void *) {
- PrintStackTrace(stderr);
+ PrintStackTrace(llvm::errs());
}
void llvm::sys::DisableSystemDialogsOnCrash() {}
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index d8b5702..d558ff5 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -599,8 +599,8 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
it.IterationHandle = intptr_t(FindHandle.take());
SmallString<128> directory_entry_path(path);
- path::append(directory_entry_path, directory_entry_name_utf8.str());
- it.CurrentEntry = directory_entry(directory_entry_path.str());
+ path::append(directory_entry_path, directory_entry_name_utf8);
+ it.CurrentEntry = directory_entry(directory_entry_path);
return std::error_code();
}
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index 854eac7..5f9ce7f 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -329,6 +329,16 @@ class DefaultColors
};
DefaultColors defaultColors;
+
+WORD fg_color(WORD color) {
+ return color & (FOREGROUND_BLUE | FOREGROUND_GREEN |
+ FOREGROUND_INTENSITY | FOREGROUND_RED);
+}
+
+WORD bg_color(WORD color) {
+ return color & (BACKGROUND_BLUE | BACKGROUND_GREEN |
+ BACKGROUND_INTENSITY | BACKGROUND_RED);
+}
}
bool Process::ColorNeedsFlush() {
@@ -350,6 +360,7 @@ const char *Process::OutputBold(bool bg) {
const char *Process::OutputColor(char code, bool bold, bool bg) {
if (UseANSI) return colorcodes[bg?1:0][bold?1:0][code&7];
+ WORD current = DefaultColors::GetCurrentColor();
WORD colors;
if (bg) {
colors = ((code&1) ? BACKGROUND_RED : 0) |
@@ -357,12 +368,14 @@ const char *Process::OutputColor(char code, bool bold, bool bg) {
((code&4) ? BACKGROUND_BLUE : 0);
if (bold)
colors |= BACKGROUND_INTENSITY;
+ colors |= fg_color(current);
} else {
colors = ((code&1) ? FOREGROUND_RED : 0) |
((code&2) ? FOREGROUND_GREEN : 0 ) |
((code&4) ? FOREGROUND_BLUE : 0);
if (bold)
colors |= FOREGROUND_INTENSITY;
+ colors |= bg_color(current);
}
SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
return 0;
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index aa1aa72..de6bf1c 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -10,13 +10,15 @@
// This file provides the Win32 specific implementation of the Signals class.
//
//===----------------------------------------------------------------------===//
-
#include "llvm/Support/FileSystem.h"
#include <algorithm>
#include <signal.h>
#include <stdio.h>
#include <vector>
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
// The Windows.h header must be after LLVM and standard headers.
#include "WindowsSupport.h"
@@ -172,6 +174,92 @@ static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
// (such as CTRL/C) occurs. This causes concurrency issues with the above
// globals which this critical section addresses.
static CRITICAL_SECTION CriticalSection;
+static bool CriticalSectionInitialized = false;
+
+static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
+ HANDLE hThread, STACKFRAME64 &StackFrame,
+ CONTEXT *Context) {
+ DWORD machineType;
+#if defined(_M_X64)
+ machineType = IMAGE_FILE_MACHINE_AMD64;
+#else
+ machineType = IMAGE_FILE_MACHINE_I386;
+#endif
+
+ // Initialize the symbol handler.
+ SymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES);
+ SymInitialize(hProcess, NULL, TRUE);
+
+ while (true) {
+ if (!StackWalk64(machineType, hProcess, hThread, &StackFrame, Context, NULL,
+ SymFunctionTableAccess64, SymGetModuleBase64, NULL)) {
+ break;
+ }
+
+ if (StackFrame.AddrFrame.Offset == 0)
+ break;
+
+ using namespace llvm;
+ // Print the PC in hexadecimal.
+ DWORD64 PC = StackFrame.AddrPC.Offset;
+#if defined(_M_X64)
+ OS << format("0x%016llX", PC);
+#elif defined(_M_IX86)
+ OS << format("0x%08lX", static_cast<DWORD>(PC));
+#endif
+
+// Print the parameters. Assume there are four.
+#if defined(_M_X64)
+ OS << format(" (0x%016llX 0x%016llX 0x%016llX 0x%016llX)",
+ StackFrame.Params[0], StackFrame.Params[1], StackFrame.Params[2],
+ StackFrame.Params[3]);
+#elif defined(_M_IX86)
+ OS << format(" (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
+ static_cast<DWORD>(StackFrame.Params[0]),
+ static_cast<DWORD>(StackFrame.Params[1]),
+ static_cast<DWORD>(StackFrame.Params[2]),
+ static_cast<DWORD>(StackFrame.Params[3]));
+#endif
+ // Verify the PC belongs to a module in this process.
+ if (!SymGetModuleBase64(hProcess, PC)) {
+ OS << " <unknown module>\n";
+ continue;
+ }
+
+ // Print the symbol name.
+ char buffer[512];
+ IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer);
+ memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64));
+ symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
+ symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64);
+
+ DWORD64 dwDisp;
+ if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) {
+ OS << '\n';
+ continue;
+ }
+
+ buffer[511] = 0;
+ if (dwDisp > 0)
+ OS << format(", %s() + 0x%llX bytes(s)", (const char*)symbol->Name,
+ dwDisp);
+ else
+ OS << format(", %s", (const char*)symbol->Name);
+
+ // Print the source file and line number information.
+ IMAGEHLP_LINE64 line;
+ DWORD dwLineDisp;
+ memset(&line, 0, sizeof(line));
+ line.SizeOfStruct = sizeof(line);
+ if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
+ OS << format(", %s, line %lu", line.FileName, line.LineNumber);
+ if (dwLineDisp > 0)
+ OS << format(" + 0x%lX byte(s)", dwLineDisp);
+ }
+
+ OS << '\n';
+ }
+}
namespace llvm {
@@ -203,6 +291,16 @@ extern "C" void HandleAbort(int Sig) {
}
}
+static void InitializeThreading() {
+ if (CriticalSectionInitialized)
+ return;
+
+ // Now's the time to create the critical section. This is the first time
+ // through here, and there's only one thread.
+ InitializeCriticalSection(&CriticalSection);
+ CriticalSectionInitialized = true;
+}
+
static void RegisterHandler() {
#if __MINGW32__ && !defined(__MINGW64_VERSION_MAJOR)
// On MinGW.org, we need to load up the symbols explicitly, because the
@@ -221,9 +319,7 @@ static void RegisterHandler() {
return;
}
- // Now's the time to create the critical section. This is the first time
- // through here, and there's only one thread.
- InitializeCriticalSection(&CriticalSection);
+ InitializeThreading();
// Enter it immediately. Now if someone hits CTRL/C, the console handler
// can't proceed until the globals are updated.
@@ -298,13 +394,37 @@ void sys::PrintStackTraceOnErrorSignal() {
RegisterHandler();
LeaveCriticalSection(&CriticalSection);
}
+}
+
+#if defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR)
+// Provide a prototype for RtlCaptureContext, mingw32 from mingw.org is
+// missing it but mingw-w64 has it.
+extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
+#endif
-void llvm::sys::PrintStackTrace(FILE *) {
- // FIXME: Implement.
+void llvm::sys::PrintStackTrace(raw_ostream &OS) {
+
+ STACKFRAME64 StackFrame = {};
+ CONTEXT Context = {0};
+ ::RtlCaptureContext(&Context);
+#if defined(_M_X64)
+ StackFrame.AddrPC.Offset = Context.Rip;
+ StackFrame.AddrStack.Offset = Context.Rsp;
+ StackFrame.AddrFrame.Offset = Context.Rbp;
+#else
+ StackFrame.AddrPC.Offset = Context.Eip;
+ StackFrame.AddrStack.Offset = Context.Esp;
+ StackFrame.AddrFrame.Offset = Context.Ebp;
+#endif
+ StackFrame.AddrPC.Mode = AddrModeFlat;
+ StackFrame.AddrStack.Mode = AddrModeFlat;
+ StackFrame.AddrFrame.Mode = AddrModeFlat;
+ PrintStackTraceForThread(OS, GetCurrentProcess(), GetCurrentThread(),
+ StackFrame, &Context);
}
-void sys::SetInterruptFunction(void (*IF)()) {
+void llvm::sys::SetInterruptFunction(void (*IF)()) {
RegisterHandler();
InterruptFunction = IF;
LeaveCriticalSection(&CriticalSection);
@@ -314,14 +434,13 @@ void sys::SetInterruptFunction(void (*IF)()) {
/// AddSignalHandler - Add a function to be called when a signal is delivered
/// to the process. The handler can have a cookie passed to it to identify
/// what instance of the handler it is.
-void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
if (CallBacksToRun == 0)
CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
RegisterHandler();
LeaveCriticalSection(&CriticalSection);
}
-}
static void Cleanup() {
EnterCriticalSection(&CriticalSection);
@@ -346,6 +465,11 @@ static void Cleanup() {
}
void llvm::sys::RunInterruptHandlers() {
+ // The interrupt handler may be called from an interrupt, but it may also be
+ // called manually (such as the case of report_fatal_error with no registered
+ // error handler). We must ensure that the critical section is properly
+ // initialized.
+ InitializeThreading();
Cleanup();
}
@@ -356,9 +480,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
STACKFRAME64 StackFrame;
memset(&StackFrame, 0, sizeof(StackFrame));
- DWORD machineType;
#if defined(_M_X64)
- machineType = IMAGE_FILE_MACHINE_AMD64;
StackFrame.AddrPC.Offset = ep->ContextRecord->Rip;
StackFrame.AddrPC.Mode = AddrModeFlat;
StackFrame.AddrStack.Offset = ep->ContextRecord->Rsp;
@@ -366,7 +488,6 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
StackFrame.AddrFrame.Offset = ep->ContextRecord->Rbp;
StackFrame.AddrFrame.Mode = AddrModeFlat;
#elif defined(_M_IX86)
- machineType = IMAGE_FILE_MACHINE_I386;
StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
StackFrame.AddrPC.Mode = AddrModeFlat;
StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
@@ -377,81 +498,8 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
HANDLE hProcess = GetCurrentProcess();
HANDLE hThread = GetCurrentThread();
-
- // Initialize the symbol handler.
- SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
- SymInitialize(hProcess, NULL, TRUE);
-
- while (true) {
- if (!StackWalk64(machineType, hProcess, hThread, &StackFrame,
- ep->ContextRecord, NULL, SymFunctionTableAccess64,
- SymGetModuleBase64, NULL)) {
- break;
- }
-
- if (StackFrame.AddrFrame.Offset == 0)
- break;
-
- // Print the PC in hexadecimal.
- DWORD64 PC = StackFrame.AddrPC.Offset;
-#if defined(_M_X64)
- fprintf(stderr, "0x%016llX", PC);
-#elif defined(_M_IX86)
- fprintf(stderr, "0x%08lX", static_cast<DWORD>(PC));
-#endif
-
- // Print the parameters. Assume there are four.
-#if defined(_M_X64)
- fprintf(stderr, " (0x%016llX 0x%016llX 0x%016llX 0x%016llX)",
- StackFrame.Params[0],
- StackFrame.Params[1],
- StackFrame.Params[2],
- StackFrame.Params[3]);
-#elif defined(_M_IX86)
- fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
- static_cast<DWORD>(StackFrame.Params[0]),
- static_cast<DWORD>(StackFrame.Params[1]),
- static_cast<DWORD>(StackFrame.Params[2]),
- static_cast<DWORD>(StackFrame.Params[3]));
-#endif
- // Verify the PC belongs to a module in this process.
- if (!SymGetModuleBase64(hProcess, PC)) {
- fputs(" <unknown module>\n", stderr);
- continue;
- }
-
- // Print the symbol name.
- char buffer[512];
- IMAGEHLP_SYMBOL64 *symbol = reinterpret_cast<IMAGEHLP_SYMBOL64 *>(buffer);
- memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL64));
- symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
- symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL64);
-
- DWORD64 dwDisp;
- if (!SymGetSymFromAddr64(hProcess, PC, &dwDisp, symbol)) {
- fputc('\n', stderr);
- continue;
- }
-
- buffer[511] = 0;
- if (dwDisp > 0)
- fprintf(stderr, ", %s() + 0x%llX bytes(s)", symbol->Name, dwDisp);
- else
- fprintf(stderr, ", %s", symbol->Name);
-
- // Print the source file and line number information.
- IMAGEHLP_LINE64 line;
- DWORD dwLineDisp;
- memset(&line, 0, sizeof(line));
- line.SizeOfStruct = sizeof(line);
- if (SymGetLineFromAddr64(hProcess, PC, &dwLineDisp, &line)) {
- fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
- if (dwLineDisp > 0)
- fprintf(stderr, " + 0x%lX byte(s)", dwLineDisp);
- }
-
- fputc('\n', stderr);
- }
+ PrintStackTraceForThread(llvm::errs(), hProcess, hThread, StackFrame,
+ ep->ContextRecord);
_exit(ep->ExceptionRecord->ExceptionCode);
}
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 6ae7945..93aec7c 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/YAMLParser.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 43a0e10..74e5414 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -7,13 +7,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Errc.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/YAMLParser.h"
-#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
#include <cstring>
@@ -168,9 +169,17 @@ void Input::endMapping() {
}
unsigned Input::beginSequence() {
- if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
+ if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode))
return SQ->Entries.size();
+ if (isa<EmptyHNode>(CurrentNode))
+ return 0;
+ // Treat case where there's a scalar "null" value as an empty sequence.
+ if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
+ if (isNull(SN->value()))
+ return 0;
}
+ // Any other type of HNode is an error.
+ setError(CurrentNode, "not a sequence");
return 0;
}
@@ -192,12 +201,7 @@ void Input::postflightElement(void *SaveInfo) {
CurrentNode = reinterpret_cast<HNode *>(SaveInfo);
}
-unsigned Input::beginFlowSequence() {
- if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
- return SQ->Entries.size();
- }
- return 0;
-}
+unsigned Input::beginFlowSequence() { return beginSequence(); }
bool Input::preflightFlowElement(unsigned index, void *&SaveInfo) {
if (EC)
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 44f6a6e..f66dfd3 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "TGParser.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index dff48f9..bb3db4b 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -32,6 +32,9 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
+def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+ "Enable ARMv8.1a extensions", [FeatureCRC]>;
+
/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -89,6 +92,10 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
FeatureCRC]>;
+def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a,
+ FeatureNEON,
+ FeatureCrypto]>;
+
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index dd401c6..3bc5a54 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 2cf3c22..bffd9e6 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -142,7 +142,7 @@ private:
int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
void scanInstruction(MachineInstr *MI, unsigned Idx,
std::map<unsigned, Chain*> &Active,
- std::set<std::unique_ptr<Chain>> &AllChains);
+ std::vector<std::unique_ptr<Chain>> &AllChains);
void maybeKillChain(MachineOperand &MO, unsigned Idx,
std::map<unsigned, Chain*> &RegChains);
Color getColor(unsigned Register);
@@ -287,12 +287,12 @@ public:
raw_string_ostream OS(S);
OS << "{";
- StartInst->print(OS, NULL, true);
+ StartInst->print(OS, /* SkipOpers= */true);
OS << " -> ";
- LastInst->print(OS, NULL, true);
+ LastInst->print(OS, /* SkipOpers= */true);
if (KillInst) {
OS << " (kill @ ";
- KillInst->print(OS, NULL, true);
+ KillInst->print(OS, /* SkipOpers= */true);
OS << ")";
}
OS << "}";
@@ -307,6 +307,11 @@ public:
//===----------------------------------------------------------------------===//
bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
+ // Don't do anything if this isn't an A53 or A57.
+ if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
+ F.getSubtarget<AArch64Subtarget>().isCortexA57()))
+ return false;
+
bool Changed = false;
DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
@@ -331,7 +336,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
// been killed yet. This is keyed by register - all chains can only have one
// "link" register between each inst in the chain.
std::map<unsigned, Chain*> ActiveChains;
- std::set<std::unique_ptr<Chain>> AllChains;
+ std::vector<std::unique_ptr<Chain>> AllChains;
unsigned Idx = 0;
for (auto &MI : MBB)
scanInstruction(&MI, Idx++, ActiveChains, AllChains);
@@ -598,10 +603,9 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
return Changed;
}
-void AArch64A57FPLoadBalancing::
-scanInstruction(MachineInstr *MI, unsigned Idx,
- std::map<unsigned, Chain*> &ActiveChains,
- std::set<std::unique_ptr<Chain>> &AllChains) {
+void AArch64A57FPLoadBalancing::scanInstruction(
+ MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
+ std::vector<std::unique_ptr<Chain>> &AllChains) {
// Inspect "MI", updating ActiveChains and AllChains.
if (isMul(MI)) {
@@ -620,7 +624,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
- AllChains.insert(std::move(G));
+ AllChains.push_back(std::move(G));
} else if (isMla(MI)) {
@@ -664,7 +668,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
<< TRI->getName(DestReg) << "\n");
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
- AllChains.insert(std::move(G));
+ AllChains.push_back(std::move(G));
} else {
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 287989f..716e1a3 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -41,6 +41,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index d64d851..1b4483a 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -12,12 +12,14 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64.h"
#include "AArch64MCInstLower.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -34,8 +36,10 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -49,7 +53,7 @@ class AArch64AsmPrinter : public AsmPrinter {
public:
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
- SM(*this), AArch64FI(nullptr), LOHLabelCounter(0) {}
+ SM(*this), AArch64FI(nullptr) {}
const char *getPassName() const override {
return "AArch64 Assembly Printer";
@@ -110,7 +114,6 @@ private:
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
MInstToMCSymbol LOHInstToLabel;
- unsigned LOHLabelCounter;
};
} // end of anonymous namespace
@@ -219,6 +222,17 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
O << '#' << Imm;
break;
}
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *Sym = getSymbol(GV);
+
+ // FIXME: Can we get anything other than a plain symbol here?
+ assert(!MO.getTargetFlags() && "Unknown operand target flag!");
+
+ O << *Sym;
+ printOffset(MO.getOffset(), O);
+ break;
+ }
}
}
@@ -450,7 +464,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (AArch64FI->getLOHRelated().count(MI)) {
// Generate a label for LOH related instruction
- MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
+ MCSymbol *LOHLabel = createTempSymbol("loh");
// Associate the instruction with the label
LOHInstToLabel[MI] = LOHLabel;
OutStreamer.EmitLabel(LOHLabel);
@@ -489,24 +503,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(OutStreamer, TmpInst);
return;
}
- case AArch64::TLSDESC_BLR: {
- MCOperand Callee, Sym;
- MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
- MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
-
- // First emit a relocation-annotation. This expands to no code, but requests
+ case AArch64::TLSDESC_CALLSEQ: {
+ /// lower this to:
+ /// adrp x0, :tlsdesc:var
+ /// ldr x1, [x0, #:tlsdesc_lo12:var]
+ /// add x0, x0, #:tlsdesc_lo12:var
+ /// .tlsdesccall var
+ /// blr x1
+ /// (TPIDR_EL0 offset now in x0)
+ const MachineOperand &MO_Sym = MI->getOperand(0);
+ MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
+ MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
+ MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ MCInstLowering.lowerOperand(MO_Sym, Sym);
+ MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
+ MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
+
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Adrp.addOperand(SymTLSDesc);
+ EmitToStreamer(OutStreamer, Adrp);
+
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Ldr.addOperand(SymTLSDescLo12);
+ Ldr.addOperand(MCOperand::CreateImm(0));
+ EmitToStreamer(OutStreamer, Ldr);
+
+ MCInst Add;
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(SymTLSDescLo12);
+ Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
+ EmitToStreamer(OutStreamer, Add);
+
+ // Emit a relocation-annotation. This expands to no code, but requests
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
MCInst TLSDescCall;
TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
TLSDescCall.addOperand(Sym);
EmitToStreamer(OutStreamer, TLSDescCall);
- // Other than that it's just a normal indirect call to the function loaded
- // from the descriptor.
- MCInst BLR;
- BLR.setOpcode(AArch64::BLR);
- BLR.addOperand(Callee);
- EmitToStreamer(OutStreamer, BLR);
+ MCInst Blr;
+ Blr.setOpcode(AArch64::BLR);
+ Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ EmitToStreamer(OutStreamer, Blr);
return;
}
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 3b74481..06ff9af 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
- case AArch64::TLSDESC_BLR:
+ case AArch64::TLSDESC_CALLSEQ:
// Make sure it's a local dynamic access.
- if (!I->getOperand(1).isSymbol() ||
- strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ if (!I->getOperand(0).isSymbol() ||
+ strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
break;
if (TLSBaseAddrReg)
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 938dcb3..568f258 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -279,7 +279,7 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
/// definition. It also consider definitions of ADRP instructions as uses and
/// ignore other uses. The ADRPMode is used to collect the information for LHO
/// that involve ADRP operation only.
-static void initReachingDef(MachineFunction &MF,
+static void initReachingDef(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
BlockToSetOfInstrsPerColor &ReachableUses,
@@ -288,7 +288,7 @@ static void initReachingDef(MachineFunction &MF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned NbReg = RegToId.size();
- for (MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock &MBB : MF) {
auto &BBGen = Gen[&MBB];
BBGen = make_unique<const MachineInstr *[]>(NbReg);
std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
@@ -382,7 +382,7 @@ static void initReachingDef(MachineFunction &MF,
/// op.reachedUses
///
/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(MachineFunction &MF,
+static void reachingDefAlgorithm(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToSetOfInstrsPerColor &In,
BlockToSetOfInstrsPerColor &Out,
@@ -392,7 +392,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
bool HasChanged;
do {
HasChanged = false;
- for (MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock &MBB : MF) {
unsigned CurReg;
for (CurReg = 0; CurReg < NbReg; ++CurReg) {
SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
@@ -401,7 +401,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
unsigned Size = BBOutSet.size();
// In[bb][color] = U Out[bb.predecessors][color]
- for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+ for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
}
@@ -433,7 +433,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
/// @p DummyOp.
/// \pre ColorOpToReachedUses is an array of at least number of registers of
/// InstrToInstrs.
-static void reachingDef(MachineFunction &MF,
+static void reachingDef(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
const MapRegToId &RegToId, bool ADRPMode = false,
const MachineInstr *DummyOp = nullptr) {
@@ -983,7 +983,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
/// Look for every register defined by potential LOHs candidates.
/// Map these registers with dense id in @p RegToId and vice-versa in
/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
+static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
MapIdToReg &IdToReg,
const TargetRegisterInfo *TRI) {
unsigned CurRegId = 0;
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 61017c1..99cb641 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3158,7 +3158,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
@@ -4563,7 +4563,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
unsigned ResultReg = 0;
uint64_t ShiftVal = C->getZExtValue();
MVT SrcVT = RetVT;
- bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
+ bool IsZExt = I->getOpcode() != Instruction::AShr;
const Value *Op0 = I->getOperand(0);
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
if (!isIntExtFree(ZExt)) {
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ac11c4d..0a47dcb 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
SDNode *SelectMLAV64LaneV128(SDNode *N);
@@ -211,13 +211,20 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
}
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
- // Require the address to be in a register. That is safe for all AArch64
- // variants and it is hard to do anything much smarter without knowing
- // how the operand is used.
- OutOps.push_back(Op);
- return false;
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ // Require the address to be in a register. That is safe for all AArch64
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
}
/// SelectArithImmed - Select an immediate value that can be represented as
@@ -299,7 +306,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
-/// \brief Determine wether it is worth to fold V into an extended register.
+/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
// it hurts if the value is used at least twice, unless we are optimizing
// for code size.
@@ -1055,7 +1062,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
SDValue Ops[] = {N->getOperand(2), // Mem operand;
Chain};
- EVT ResTys[] = {MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
@@ -1077,8 +1084,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
N->getOperand(2), // Incremental
Chain};
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
@@ -1119,8 +1126,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
EVT VT = N->getOperand(2)->getValueType(0);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Other}; // Type for the Chain
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other}; // Type for the Chain
// Form a REG_SEQUENCE to force register allocation.
bool Is128Bit = VT.getSizeInBits() == 128;
@@ -1136,6 +1143,7 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
return St;
}
+namespace {
/// WidenVector - Given a value in the V64 register class, produce the
/// equivalent value in the V128 register class.
class WidenVector {
@@ -1156,6 +1164,7 @@ public:
return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
}
};
+} // namespace
/// NarrowVector - Given a value in the V128 register class, produce the
/// equivalent value in the V64 register class.
@@ -1184,7 +1193,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
@@ -1224,8 +1233,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
@@ -1309,8 +1318,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index a1b324e..0c0e856 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
+ cl::desc("Allow AArch64 SLI/SRI formation"),
+ cl::init(false));
+
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
+ "aarch64-elf-ldtls-generation", cl::Hidden,
+ cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+ cl::init(false));
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
@@ -362,9 +370,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
- for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
- MVT Ty = RoundingTypes[I];
+ for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -561,9 +567,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
- for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
- MVT Ty = RoundingVecTypes[I];
+ for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -752,7 +756,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
+ case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
@@ -811,6 +815,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
+ case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
+ case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
+ case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
+ case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
+ case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
+ case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
@@ -1247,7 +1257,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
- bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
+ bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
// For a 32 bit multiply with overflow check we want the instruction
@@ -2784,13 +2794,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
- Mask = TRI->getThisReturnPreservedMask(CallConv);
+ Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
IsThisReturn = false;
- Mask = TRI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = TRI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3027,58 +3037,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
-///
-/// Thus, the ideal call sequence on AArch64 is:
-///
-/// adrp x0, :tlsdesc:thread_var
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
-/// add x0, x0, :tlsdesc_lo12:thread_var
-/// .tlsdesccall thread_var
-/// blr x8
-/// (TPIDR_EL0 offset now in x0).
+/// is a function pointer to carry out the resolution.
///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
+/// The sequence is:
+/// adrp x0, :tlsdesc:var
+/// ldr x1, [x0, #:tlsdesc_lo12:var]
+/// add x0, x0, #:tlsdesc_lo12:var
+/// .tlsdesccall var
+/// blr x1
+/// (TPIDR_EL0 offset now in x0)
///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
- SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const {
+/// The above sequence must be produced unscheduled, to enable the linker to
+/// optimize/relax this sequence.
+/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+/// above sequence, and expanded really late in the compilation flow, to ensure
+/// the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- // The function we need to call is simply the first entry in the GOT for this
- // descriptor, load it in preparation.
- SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
-
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
- // silly).
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
-
- // The function takes only one argument: the address of the descriptor itself
- // in X0.
- SDValue Glue, Chain;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
- Glue = Chain.getValue(1);
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- // We're now ready to populate the argument list, as with a normal call:
- SmallVector<SDValue, 6> Ops;
+ SmallVector<SDValue, 2> Ops;
Ops.push_back(Chain);
- Ops.push_back(Func);
Ops.push_back(SymAddr);
- Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
- Ops.push_back(DAG.getRegisterMask(Mask));
- Ops.push_back(Glue);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
- Glue = Chain.getValue(1);
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+ SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
@@ -3089,9 +3075,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
"ELF TLS only supported in small memory model");
+ // Different choices can be made for the maximum size of the TLS area for a
+ // module. For the small address model, the default TLS size is 16MiB and the
+ // maximum TLS size is 4GiB.
+ // FIXME: add -mtls-size command line option and make it control the 16MiB
+ // vs. 4GiB code sequence generation.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+ if (Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+ }
SDValue TPOff;
EVT PtrVT = getPointerTy();
@@ -3102,17 +3097,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
+ SDValue TPWithOff_lo =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ HiVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ SDValue TPWithOff =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+ LoVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ return TPWithOff;
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -3127,19 +3125,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3148,40 +3133,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
-
- SDValue DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
-
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
- } else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3189,7 +3157,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
@@ -3356,11 +3324,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
EVT VecVT;
EVT EltVT;
- SDValue EltMask, VecVal1, VecVal2;
+ uint64_t EltMask;
+ SDValue VecVal1, VecVal2;
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
EltVT = MVT::i32;
VecVT = MVT::v4i32;
- EltMask = DAG.getConstant(0x80000000ULL, EltVT);
+ EltMask = 0x80000000ULL;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
@@ -3378,7 +3347,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
// We want to materialize a mask with the the high bit set, but the AdvSIMD
// immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize zero and then negate it.
- EltMask = DAG.getConstant(0, EltVT);
+ EltMask = 0;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
@@ -3393,11 +3362,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
llvm_unreachable("Invalid type for copysign!");
}
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
- BuildVectorOps.push_back(EltMask);
-
- SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps);
+ SDValue BuildVec = DAG.getConstant(EltMask, VecVT);
// If we couldn't materialize the mask above, then the mask vector will be
// the zero vector, and we need to negate it here.
@@ -5927,8 +5892,10 @@ FailedModImm:
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
- MVT NewType =
- (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ EVT EltTy = VT.getVectorElementType();
+ assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
+ "Unsupported floating-point vector type");
+ MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
@@ -6781,7 +6748,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
- return (Shift < 3) ? true : false;
+ return Shift < 3;
}
// Generate SUBS and CSEL for integer abs.
@@ -6898,6 +6865,15 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
N->getOperand(0));
}
} else {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ APInt VNP1 = -Value + 1;
+ if (VNP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+ ShiftedVal);
+ }
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
APInt VNM1 = -Value - 1;
if (VNM1.isPowerOf2()) {
@@ -6908,15 +6884,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
}
- // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
- APInt VNP1 = -Value + 1;
- if (VNP1.isPowerOf2()) {
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VNP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
- ShiftedVal);
- }
}
}
return SDValue();
@@ -7211,21 +7178,54 @@ static SDValue performBitcastCombine(SDNode *N,
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // Optimize concat_vectors of truncated vectors, where the intermediate
+ // type is illegal, to avoid said illegality, e.g.,
+ // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
+ // (v2i16 (truncate (v2i64)))))
+ // ->
+ // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
+ // (v4i32 (bitcast (v2i64))),
+ // <0, 2, 4, 6>)))
+ // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
+ // on both input and result type, so we might generate worse code.
+ // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
+ if (N->getNumOperands() == 2 &&
+ N0->getOpcode() == ISD::TRUNCATE &&
+ N1->getOpcode() == ISD::TRUNCATE) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N10 = N1->getOperand(0);
+ EVT N00VT = N00.getValueType();
+
+ if (N00VT == N10.getValueType() &&
+ (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
+ N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
+ MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
+ SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
+ for (size_t i = 0; i < Mask.size(); ++i)
+ Mask[i] = i * 2;
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getVectorShuffle(
+ MidVT, dl,
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
+ }
+ }
+
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
- if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
+ if (N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getVectorElementType().getSizeInBits() == 64);
- return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
- WidenVector(N->getOperand(0), DAG),
+ return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
DAG.getConstant(0, MVT::i64));
}
@@ -7238,10 +7238,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
- SDValue Op1 = N->getOperand(1);
- if (Op1->getOpcode() != ISD::BITCAST)
+ if (N1->getOpcode() != ISD::BITCAST)
return SDValue();
- SDValue RHS = Op1->getOperand(0);
+ SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
// If the RHS is not a vector, this is not the pattern we're looking for.
if (!RHSTy.isVector())
@@ -7251,10 +7250,10 @@ static SDValue performConcatVectorsCombine(SDNode *N,
MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
RHSTy.getVectorNumElements() * 2);
- return DAG.getNode(
- ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
- DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+ DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
+ RHS));
}
static SDValue tryCombineFixedPointConvert(SDNode *N,
@@ -7651,6 +7650,15 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
}
+static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
+ SelectionDAG &DAG) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
+ DAG.getNode(Opc, SDLoc(N),
+ N->getOperand(1).getSimpleValueType(),
+ N->getOperand(1)),
+ DAG.getConstant(0, MVT::i64));
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -7663,6 +7671,18 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
break;
+ case Intrinsic::aarch64_neon_saddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
+ case Intrinsic::aarch64_neon_uaddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
+ case Intrinsic::aarch64_neon_sminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
+ case Intrinsic::aarch64_neon_uminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
+ case Intrinsic::aarch64_neon_smaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
+ case Intrinsic::aarch64_neon_umaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
@@ -8792,9 +8812,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
-bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128;
+ return Size <= 128 ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index e973364..5ff11e8 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -30,9 +30,9 @@ enum {
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
- // Almost the same as a normal call node, except that a TLSDesc relocation is
- // needed so the linker can relax it correctly if possible.
- TLSDESC_CALL,
+ // Produces the full sequence of instructions for getting the thread pointer
+ // offset of a variable into X0, using the TLSDesc model.
+ TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
@@ -141,6 +141,18 @@ enum {
FCMLEz,
FCMLTz,
+ // Vector across-lanes addition
+ // Only the lower result lane is defined.
+ SADDV,
+ UADDV,
+
+ // Vector across-lanes min/max
+ // Only the lower result lane is defined.
+ SMINV,
+ UMINV,
+ SMAXV,
+ UMAXV,
+
// Vector bitwise negation
NOT,
@@ -335,7 +347,8 @@ public:
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
@@ -399,8 +412,8 @@ private:
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const;
+ SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -460,6 +473,16 @@ private:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
+ // followed by llvm_unreachable so we'll leave them unimplemented in
+ // the backend for now.
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 64cec55..8e0af2d 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
- RI(this, &STI), Subtarget(STI) {}
+ RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
@@ -2068,10 +2068,10 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.setMIFlag(Flag);
}
-MachineInstr *
-AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// This is a bit of a hack. Consider this instruction:
//
// %vreg0<def> = COPY %SP; GPR64all:%vreg0
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index d8f1274..fa4b8b7 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -129,10 +129,9 @@ public:
const TargetRegisterInfo *TRI) const override;
using TargetInstrInfo::foldMemoryOperandImpl;
- MachineInstr *
- foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 6e4c0b0..ec6fa5c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -22,6 +22,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">,
+ AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsCyclone : Predicate<"Subtarget->isCyclone()">;
@@ -96,6 +98,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+// adrp x0, :tlsdesc:var
+// ldr x1, [x0, #:tlsdesc_lo12:var]
+// add x0, x0, #:tlsdesc_lo12:var
+// .tlsdesccall var
+// blr x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+ [SDTCisPtrTy<0>]>;
+
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
@@ -229,10 +244,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
- SDT_AArch64TLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
@@ -244,6 +260,13 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
+def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
+def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
+def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
+def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
+def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
+def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -1049,15 +1072,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
let AsmString = ".tlsdesccall $sym";
}
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
- [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+ isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
@@ -2326,8 +2350,15 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
defm FMOV : UnscaledConversion<"fmov">;
-def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
-def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
+// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
+def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
+ Requires<[NoZCZ]>;
+def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
+ Requires<[NoZCZ]>;
+}
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
@@ -3416,10 +3447,10 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
+def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
+def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
@@ -3709,10 +3740,6 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
// Floating point vector extractions are codegen'd as either a sequence of
@@ -3776,121 +3803,143 @@ defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
-multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
- def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (SMOVvi8to32
+// Patterns for across-vector intrinsics, that have a node equivalent, that
+// returns a vector (with only the low lane defined) instead of a scalar.
+// In effect, opNode is the same as (scalar_to_vector (IntNode)).
+multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
+ SDPatternOperator opNode> {
+// If a lane instruction caught the vector_extract around opNode, we can
+// directly match the latter to the instruction.
+def : Pat<(v8i8 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
+def : Pat<(v16i8 (opNode V128:$Rn)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
+def : Pat<(v4i16 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
+def : Pat<(v8i16 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
+def : Pat<(v4i32 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
+
+
+// If none did, fallback to the explicit patterns, consuming the vector_extract.
+def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
+ (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (insert_subvector undef,
+ (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
+ ssub), ssub)>;
+
+}
+
+multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a sign extension after this intrinsic, consume it as smov already
// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
(i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
}
-multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
- def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
+multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a masking operation keeping only what has been actually
// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
+ maski8_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
ssub))>;
-
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
+ maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
ssub))>;
+}
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
-}
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
+def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
+ (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
+def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
+ (SMINPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
+def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
+ (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
+def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
+ (UMINPv2i32 V64:$Rn, V64:$Rn)>;
multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
@@ -3953,32 +4002,6 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
dsub))>;
}
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
-def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
-def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
-def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
-def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8463ce6..b1499e2 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -63,16 +63,24 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
+ // \p SExtIdx[out] gives the index of the result of the load pair that
+ // must be extended. The value of SExtIdx assumes that the paired load
+ // produces the value in this order: (I, returned iterator), i.e.,
+ // -1 means no value has to be extended, 0 means I, and 1 means the
+ // returned iterator.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward,
+ bool &MergeForward, int &SExtIdx,
unsigned Limit);
// Merge the two instructions indicated into a single pair-wise instruction.
// If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
+ // \p SExtIdx index of the result that must be extended for a paired load.
+ // -1 means none, 0 means I, and 1 means Paired.
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool MergeForward);
+ MachineBasicBlock::iterator Paired, bool MergeForward,
+ int SExtIdx);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -181,6 +189,43 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
}
}
+static unsigned getMatchingNonSExtOpcode(unsigned Opc,
+ bool *IsValidLdStrOpc = nullptr) {
+ if (IsValidLdStrOpc)
+ *IsValidLdStrOpc = true;
+ switch (Opc) {
+ default:
+ if (IsValidLdStrOpc)
+ *IsValidLdStrOpc = false;
+ return UINT_MAX;
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ case AArch64::STRWui:
+ case AArch64::STURWi:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ case AArch64::STRSui:
+ case AArch64::STURSi:
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ return Opc;
+ case AArch64::LDRSWui:
+ return AArch64::LDRWui;
+ case AArch64::LDURSWi:
+ return AArch64::LDURWi;
+ }
+}
+
static unsigned getMatchingPairOpcode(unsigned Opc) {
switch (Opc) {
default:
@@ -282,7 +327,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool MergeForward) {
+ bool MergeForward, int SExtIdx) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -292,11 +337,13 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (NextI == Paired)
++NextI;
- bool IsUnscaled = isUnscaledLdst(I->getOpcode());
+ unsigned Opc =
+ SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
+ bool IsUnscaled = isUnscaledLdst(Opc);
int OffsetStride =
IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
- unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
+ unsigned NewOpc = getMatchingPairOpcode(Opc);
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
@@ -311,6 +358,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
Paired->getOperand(2).getImm() + OffsetStride) {
RtMI = Paired;
Rt2MI = I;
+ // Here we swapped the assumption made for SExtIdx.
+ // I.e., we turn ldp I, Paired into ldp Paired, I.
+ // Update the index accordingly.
+ if (SExtIdx != -1)
+ SExtIdx = (SExtIdx + 1) % 2;
} else {
RtMI = I;
Rt2MI = Paired;
@@ -337,8 +389,47 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
DEBUG(dbgs() << " ");
DEBUG(Paired->print(dbgs()));
DEBUG(dbgs() << " with instruction:\n ");
- DEBUG(((MachineInstr *)MIB)->print(dbgs()));
- DEBUG(dbgs() << "\n");
+
+ if (SExtIdx != -1) {
+ // Generate the sign extension for the proper result of the ldp.
+ // I.e., with X1, that would be:
+ // %W1<def> = KILL %W1, %X1<imp-def>
+ // %X1<def> = SBFMXri %X1<kill>, 0, 31
+ MachineOperand &DstMO = MIB->getOperand(SExtIdx);
+ // Right now, DstMO has the extended register, since it comes from an
+ // extended opcode.
+ unsigned DstRegX = DstMO.getReg();
+ // Get the W variant of that register.
+ unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
+ // Update the result of LDP to use the W instead of the X variant.
+ DstMO.setReg(DstRegW);
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ // Make the machine verifier happy by providing a definition for
+ // the X register.
+ // Insert this definition right after the generated LDP, i.e., before
+ // InsertionPoint.
+ MachineInstrBuilder MIBKill =
+ BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(TargetOpcode::KILL), DstRegW)
+ .addReg(DstRegW)
+ .addReg(DstRegX, RegState::Define);
+ MIBKill->getOperand(2).setImplicit();
+ // Create the sign extension.
+ MachineInstrBuilder MIBSXTW =
+ BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::SBFMXri), DstRegX)
+ .addReg(DstRegX)
+ .addImm(0)
+ .addImm(31);
+ (void)MIBSXTW;
+ DEBUG(dbgs() << " Extend operand:\n ");
+ DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ } else {
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
// Erase the old instructions.
I->eraseFromParent();
@@ -396,7 +487,8 @@ static int alignTo(int Num, int PowOf2) {
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward, unsigned Limit) {
+ bool &MergeForward, int &SExtIdx,
+ unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
@@ -436,7 +528,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Now that we know this is a real instruction, count it.
++Count;
- if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
+ bool CanMergeOpc = Opc == MI->getOpcode();
+ SExtIdx = -1;
+ if (!CanMergeOpc) {
+ bool IsValidLdStrOpc;
+ unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
+ if (!IsValidLdStrOpc)
+ continue;
+ // Opc will be the first instruction in the pair.
+ SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
+ CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
+ }
+
+ if (CanMergeOpc && MI->getOperand(2).isImm()) {
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
@@ -823,13 +927,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
}
// Look ahead up to ScanLimit instructions for a pairable instruction.
bool MergeForward = false;
+ int SExtIdx = -1;
MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, MergeForward, ScanLimit);
+ findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
if (Paired != E) {
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
Modified = true;
++NumPairCreated;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index e57b0f4..b829341 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -22,9 +22,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
+
AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
@@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
Model = Printer.TM.getTLSModel(GV);
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration &&
+ Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+
} else {
assert(MO.isSymbol() &&
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
"unexpected external TLS symbol");
+ // The general dynamic access sequence is used to get the
+ // address of _TLS_MODULE_BASE_.
Model = TLSModel::GeneralDynamic;
}
switch (Model) {
@@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_G1;
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
RefFlags |= AArch64MCExpr::VK_G0;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12)
+ RefFlags |= AArch64MCExpr::VK_HI12;
if (MO.getTargetFlags() & AArch64II::MO_NC)
RefFlags |= AArch64MCExpr::VK_NC;
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index 4690177..5394875 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -319,7 +319,7 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
const MachineInstr &MI) {
- LiveInterval LI = LIs.getInterval(reg);
+ const LiveInterval &LI = LIs.getInterval(reg);
SlotIndex SI = LIs.getInstructionIndex(&MI);
return LI.expiredAt(SI);
}
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index c037c86..e1b93bf 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -38,6 +38,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -189,9 +190,11 @@ private:
IPI->second.push_back(&Use);
// Transfer the dominated uses of IPI to NewPt
// Inserting into the DenseMap may invalidate existing iterator.
- // Keep a copy of the key to find the iterator to erase.
+ // Keep a copy of the key to find the iterator to erase. Keep a copy of the
+ // value so that we don't have to dereference IPI->second.
Instruction *OldInstr = IPI->first;
- InsertPts[NewPt] = std::move(IPI->second);
+ Uses OldUses = std::move(IPI->second);
+ InsertPts[NewPt] = std::move(OldUses);
// Erase IPI.
InsertPts.erase(OldInstr);
}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 206cdbb..33c11fe 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -37,9 +38,8 @@ static cl::opt<bool>
ReserveX18("aarch64-reserve-x18", cl::Hidden,
cl::desc("Reserve X18, making it unavailable as GPR"));
-AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
- const AArch64Subtarget *sti)
- : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
+AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
+ : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -55,7 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
const uint32_t *
-AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_AArch64_NoRegs_RegMask;
@@ -66,15 +67,16 @@ AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
- if (STI->isTargetDarwin())
+ if (TT.isOSDarwin())
return CSR_AArch64_TLS_Darwin_RegMask;
- assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
+ assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS");
return CSR_AArch64_TLS_ELF_RegMask;
}
const uint32_t *
-AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i64 argument (which must also be the register used to return a
@@ -97,12 +99,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AArch64::WSP);
Reserved.set(AArch64::WZR);
- if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
+ if (TFI->hasFP(MF) || TT.isOSDarwin()) {
Reserved.set(AArch64::FP);
Reserved.set(AArch64::W29);
}
- if (STI->isTargetDarwin() || ReserveX18) {
+ if (TT.isOSDarwin() || ReserveX18) {
Reserved.set(AArch64::X18); // Platform register
Reserved.set(AArch64::W18);
}
@@ -129,10 +131,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case AArch64::X18:
case AArch64::W18:
- return STI->isTargetDarwin() || ReserveX18;
+ return TT.isOSDarwin() || ReserveX18;
case AArch64::FP:
case AArch64::W29:
- return TFI->hasFP(MF) || STI->isTargetDarwin();
+ return TFI->hasFP(MF) || TT.isOSDarwin();
case AArch64::W19:
case AArch64::X19:
return hasBasePointer(MF);
@@ -269,7 +271,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
- if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
+ if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset))
return false;
// If we can reference via the stack pointer or base pointer, try that.
@@ -277,7 +279,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
- if (isFrameOffsetLegal(MI, Offset))
+ if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
return false;
// The offset likely isn't legal; we want to allocate a virtual base register.
@@ -285,6 +287,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
}
bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
int64_t Offset) const {
assert(Offset <= INT_MAX && "Offset too big to fit in int.");
assert(MI && "Unable to get the legal offset for nil instruction.");
@@ -302,10 +305,11 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
DebugLoc DL; // Defaults to "unknown"
if (Ins != MBB->end())
DL = Ins->getDebugLoc();
-
+ const MachineFunction &MF = *MBB->getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const MachineFunction &MF = *MBB->getParent();
MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
@@ -324,6 +328,9 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const AArch64InstrInfo *TII =
+ MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
assert(Done && "Unable to resolve frame index!");
(void)Done;
@@ -337,6 +344,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
MF.getSubtarget().getFrameLowering());
@@ -389,10 +398,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64RegClassID:
case AArch64::GPR32commonRegClassID:
case AArch64::GPR64commonRegClassID:
- return 32 - 1 // XZR/SP
- - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
- - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register
- - hasBasePointer(MF); // X19
+ return 32 - 1 // XZR/SP
+ - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
+ - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register
+ - hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
case AArch64::FPR32RegClassID:
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index 51a5034..c01bfa5 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -19,26 +19,24 @@
namespace llvm {
-class AArch64InstrInfo;
-class AArch64Subtarget;
class MachineFunction;
class RegScavenger;
class TargetRegisterClass;
+class Triple;
struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
private:
- const AArch64InstrInfo *TII;
- const AArch64Subtarget *STI;
+ const Triple &TT;
public:
- AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti);
+ AArch64RegisterInfo(const Triple &TT);
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
unsigned getCSRFirstUseCost() const override {
// The cost will be compared against BlockFrequency where entry has the
@@ -59,7 +57,8 @@ public:
///
/// Should return NULL in the case that the calling convention does not have
/// this property
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass *
@@ -73,7 +72,7 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
int FrameIdx,
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index c613025..221d70d 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -48,7 +48,7 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+ HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS)),
TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index d418cc5..bcab97d 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -41,6 +41,7 @@ protected:
bool HasNEON;
bool HasCrypto;
bool HasCRC;
+ bool HasV8_1a;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove;
@@ -86,6 +87,7 @@ public:
const AArch64RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
+ const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostMachineScheduler() const override {
return isCortexA53() || isCortexA57();
@@ -99,6 +101,7 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ bool hasV8_1a() const { return HasV8_1a; }
bool isLittleEndian() const { return IsLittle; }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index d73d0b3..f902f64 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -104,6 +104,16 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<AArch64_ELFTargetObjectFile>();
}
+// Helper function to build a DataLayout string
+static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
+ Triple Triple(TT);
+ if (Triple.isOSBinFormatMachO())
+ return "e-m:o-i64:64-i128:128-n32:64-S128";
+ if (LittleEndian)
+ return "e-m:e-i64:64-i128:128-n32:64-S128";
+ return "E-m:e-i64:64-i128:128-n32:64-S128";
+}
+
/// TargetMachine ctor - Create an AArch64 architecture model.
///
AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
@@ -112,16 +122,12 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool LittleEndian)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- // This nested ternary is horrible, but DL needs to be properly
- // initialized
- // before TLInfo is constructed.
- DL(Triple(TT).isOSBinFormatMachO()
- ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
- : "E-m:e-i64:64-i128:128-n32:64-S128")),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized before TLInfo is constructed.
+ : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) {
+ isLittle(LittleEndian) {
initAsmInfo();
}
@@ -239,7 +245,7 @@ bool AArch64PassConfig::addPreISel() {
// FIXME: On AArch64, this depends on the type.
// Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGlobalMergePass(TM, 4095));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
@@ -287,10 +293,7 @@ void AArch64PassConfig::addPostRegAlloc() {
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
- if (TM->getOptLevel() != CodeGenOpt::None &&
- (TM->getSubtarget<AArch64Subtarget>().isCortexA53() ||
- TM->getSubtarget<AArch64Subtarget>().isCortexA57()) &&
- usingDefaultRegAlloc())
+ if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 7143adf..ec34fad 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -23,9 +23,7 @@ namespace llvm {
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
- const DataLayout DL;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- AArch64Subtarget Subtarget;
mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
public:
@@ -35,11 +33,6 @@ public:
CodeGenOpt::Level OL, bool IsLittleEndian);
~AArch64TargetMachine() override;
-
- const DataLayout *getDataLayout() const override { return &DL; }
- const AArch64Subtarget *getSubtargetImpl() const override {
- return &Subtarget;
- }
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 4069038..8ff58e9 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
using namespace llvm;
using namespace dwarf;
@@ -23,6 +24,11 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
InitializeELF(TM.Options.UseInitArray);
}
+AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
+ : TargetLoweringObjectFileMachO() {
+ SupportGOTPCRelWithOffset = false;
+}
+
const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
@@ -50,3 +56,18 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
MachineModuleInfo *MMI) const {
return TM.getSymbol(GV, Mang);
}
+
+const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ assert((Offset+MV.getConstant() == 0) &&
+ "Arch64 does not support GOT PC rel with extra offset");
+ // On ARM64 Darwin, we can reference symbols with foo@GOT-., which
+ // is an indirect pc-relative reference.
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+ MCSymbol *PCSym = getContext().CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+ return MCBinaryExpr::CreateSub(Res, PC, getContext());
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 2e595f9..d41f445 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -24,6 +24,8 @@ class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
+ AArch64_MachoTargetObjectFile();
+
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
unsigned Encoding, Mangler &Mang,
const TargetMachine &TM,
@@ -33,6 +35,11 @@ public:
MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
+
+ const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0646d85..0533355 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -10,6 +10,7 @@
#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
@@ -352,7 +353,7 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// We don't lower vector selects well that are wider than the register width.
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
- unsigned AmortizationCost = 20;
+ const unsigned AmortizationCost = 20;
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
VectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
@@ -426,6 +427,15 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor() {
void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
+ // Enable partial unrolling and runtime unrolling.
+ BaseT::getUnrollingPreferences(L, UP);
+
+ // For inner loop, it is more likely to be a hot one, and the runtime check
+ // can be promoted out from LICM pass, so the overhead is less, let's try
+ // a larger threshold to unroll more loops.
+ if (L->getLoopDepth() > 1)
+ UP.PartialThreshold *= 2;
+
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
}
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1960c99..1219ffc 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -113,11 +113,10 @@ public:
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "AArch64GenAsmMatcher.inc"
};
- AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI) {
- MCAsmParserExtension::Initialize(_Parser);
+ AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI) {
+ MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
if (S.getTargetStreamer() == nullptr)
new AArch64TargetStreamer(S);
@@ -205,6 +204,8 @@ private:
struct BarrierOp {
unsigned Val; // Not the enum since not all values have names.
+ const char *Data;
+ unsigned Length;
};
struct SysRegOp {
@@ -221,6 +222,8 @@ private:
struct PrefetchOp {
unsigned Val;
+ const char *Data;
+ unsigned Length;
};
struct ShiftExtendOp {
@@ -254,8 +257,7 @@ private:
MCContext &Ctx;
public:
- AArch64Operand(KindTy K, MCContext &_Ctx)
- : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
+ AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {}
AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
Kind = o.Kind;
@@ -349,6 +351,11 @@ public:
return Barrier.Val;
}
+ StringRef getBarrierName() const {
+ assert(Kind == k_Barrier && "Invalid access!");
+ return StringRef(Barrier.Data, Barrier.Length);
+ }
+
unsigned getReg() const override {
assert(Kind == k_Register && "Invalid access!");
return Reg.RegNum;
@@ -384,6 +391,11 @@ public:
return Prefetch.Val;
}
+ StringRef getPrefetchName() const {
+ assert(Kind == k_Prefetch && "Invalid access!");
+ return StringRef(Prefetch.Data, Prefetch.Length);
+ }
+
AArch64_AM::ShiftExtendType getShiftExtendType() const {
assert(Kind == k_ShiftExtend && "Invalid access!");
return ShiftExtend.Type;
@@ -752,58 +764,47 @@ public:
}
bool isMovZSymbolG3() const {
- static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
}
bool isMovZSymbolG2() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
- AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2};
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
+ AArch64MCExpr::VK_TPREL_G2,
+ AArch64MCExpr::VK_DTPREL_G2});
}
bool isMovZSymbolG1() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
+ return isMovWSymbol({
+ AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1,
AArch64MCExpr::VK_DTPREL_G1,
- };
- return isMovWSymbol(Variants);
+ });
}
bool isMovZSymbolG0() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
- AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0};
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
+ AArch64MCExpr::VK_TPREL_G0,
+ AArch64MCExpr::VK_DTPREL_G0});
}
bool isMovKSymbolG3() const {
- static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
}
bool isMovKSymbolG2() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G2_NC};
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC);
}
bool isMovKSymbolG1() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC,
- AArch64MCExpr::VK_DTPREL_G1_NC
- };
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC,
+ AArch64MCExpr::VK_TPREL_G1_NC,
+ AArch64MCExpr::VK_DTPREL_G1_NC});
}
bool isMovKSymbolG0() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
- AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC
- };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(
+ {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC});
}
template<int RegWidth, int Shift>
@@ -1608,10 +1609,14 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S,
+ static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val,
+ StringRef Str,
+ SMLoc S,
MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
+ Op->Barrier.Data = Str.data();
+ Op->Barrier.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -1642,10 +1647,14 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S,
+ static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val,
+ StringRef Str,
+ SMLoc S,
MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
Op->Prefetch.Val = Val;
+ Op->Barrier.Data = Str.data();
+ Op->Barrier.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -1673,9 +1682,8 @@ void AArch64Operand::print(raw_ostream &OS) const {
<< AArch64_AM::getFPImmFloat(getFPImm()) << ") >";
break;
case k_Barrier: {
- bool Valid;
- StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid);
- if (Valid)
+ StringRef Name = getBarrierName();
+ if (!Name.empty())
OS << "<barrier " << Name << ">";
else
OS << "<barrier invalid #" << getBarrier() << ">";
@@ -1718,9 +1726,8 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "c" << getSysCR();
break;
case k_Prefetch: {
- bool Valid;
- StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid);
- if (Valid)
+ StringRef Name = getPrefetchName();
+ if (!Name.empty())
OS << "<prfop " << Name << ">";
else
OS << "<prfop invalid #" << getPrefetch() << ">";
@@ -1963,7 +1970,11 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ bool Valid;
+ auto Mapper = AArch64PRFM::PRFMMapper();
+ StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
+ S, getContext()));
return MatchOperand_Success;
}
@@ -1973,14 +1984,16 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
}
bool Valid;
- unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid);
+ auto Mapper = AArch64PRFM::PRFMMapper();
+ unsigned prfop = Mapper.fromString(Tok.getString(), Valid);
if (!Valid) {
TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat identifier token.
- Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(),
+ S, getContext()));
return MatchOperand_Success;
}
@@ -2582,8 +2595,11 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
Error(ExprLoc, "barrier operand out of range");
return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
+ bool Valid;
+ auto Mapper = AArch64DB::DBarrierMapper();
+ StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+ Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
+ ExprLoc, getContext()));
return MatchOperand_Success;
}
@@ -2593,7 +2609,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
}
bool Valid;
- unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid);
+ auto Mapper = AArch64DB::DBarrierMapper();
+ unsigned Opt = Mapper.fromString(Tok.getString(), Valid);
if (!Valid) {
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
@@ -2605,8 +2622,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateBarrier(Opt, getLoc(), getContext()));
+ Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(),
+ getLoc(), getContext()));
Parser.Lex(); // Consume the option
return MatchOperand_Success;
@@ -2631,8 +2648,8 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
assert(IsKnown == (MSRReg != -1U) &&
"register should be -1 if and only if it's unknown");
- uint32_t PStateField =
- AArch64PState::PStateMapper().fromString(Tok.getString(), IsKnown);
+ auto PStateMapper = AArch64PState::PStateMapper();
+ uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown);
assert(IsKnown == (PStateField != -1U) &&
"register should be -1 if and only if it's unknown");
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 423da65..84b63a0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -493,14 +494,28 @@ void ELFAArch64AsmBackend::processFixupValue(
IsResolved = false;
}
+// Returns whether this fixup is based on an address in the .eh_frame section,
+// and therefore should be byte swapped.
+// FIXME: Should be replaced with something more principled.
+static bool isByteSwappedFixup(const MCExpr *E) {
+ MCValue Val;
+ if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr))
+ return false;
+
+ if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined())
+ return false;
+
+ const MCSectionELF *SecELF =
+ dyn_cast<MCSectionELF>(&Val.getSymA()->getSymbol().getSection());
+ return SecELF->getSectionName() == ".eh_frame";
+}
+
void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
// store fixups in .eh_frame section in big endian order
if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
- const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
- const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec);
- if (SecELF && SecELF->getSectionName() == ".eh_frame")
+ if (isByteSwappedFixup(Fixup.getValue()))
Value = ByteSwap_32(unsigned(Value));
}
AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 8dc6c30..8f780d2 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -203,24 +203,27 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
}
namespace llvm {
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new AArch64TargetAsmStreamer(*S, OS);
- return S;
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new AArch64TargetAsmStreamer(S, OS);
}
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll) {
AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
- new AArch64TargetELFStreamer(*S);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
}
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new AArch64TargetELFStreamer(S);
+ return nullptr;
+}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 4756a19..9ea49f0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -38,9 +38,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
- AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : Ctx(ctx) {}
+ AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
~AArch64MCCodeEmitter() {}
@@ -205,9 +203,8 @@ public:
MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new AArch64MCCodeEmitter(MCII, STI, Ctx);
+ return new AArch64MCCodeEmitter(MCII, Ctx);
}
/// getMachineOpValue - Return binary encoding of operand. If the machine
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index e396df8..9e31508 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/ELF.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 0f7a6b8..38b399d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -123,94 +123,61 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
return nullptr;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
- /*LabelSections*/ true);
-
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
}
+static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool DWARFMustBeAtTheEnd) {
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+ DWARFMustBeAtTheEnd,
+ /*LabelSections*/ true);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeAArch64TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
- createAArch64MCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARM64Target,
- createAArch64MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARM64Target,
- createAArch64MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
- createAArch64MCSubtargetInfo);
+ for (Target *T :
+ {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createAArch64MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createAArch64MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter);
+
+ // Register the obj streamers.
+ TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+ TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer);
+
+ // Register the obj target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(
+ *T, createAArch64ObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T,
+ createAArch64AsmTargetStreamer);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createAArch64MCInstPrinter);
+ }
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget,
- createAArch64leAsmBackend);
+ for (Target *T : {&TheAArch64leTarget, &TheARM64Target})
+ TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
createAArch64beAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARM64Target,
- createAArch64leAsmBackend);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
- createAArch64MCCodeEmitter);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget,
- createAArch64MCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget,
- createAArch64MCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheARM64Target,
- createAArch64MCAsmStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
- createAArch64MCInstPrinter);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 1553115..7ce303b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -28,8 +28,10 @@ class MCRegisterInfo;
class MCObjectWriter;
class MCStreamer;
class MCSubtargetInfo;
+class MCTargetStreamer;
class StringRef;
class Target;
+class Triple;
class raw_ostream;
extern Target TheAArch64leTarget;
@@ -37,9 +39,8 @@ extern Target TheAArch64beTarget;
extern Target TheARM64Target;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
MCAsmBackend *createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI, StringRef TT,
StringRef CPU);
@@ -53,11 +54,14 @@ MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
uint32_t CPUSubtype);
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst);
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm);
+
+MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI);
+
} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index bc6c7a9..160c1c5 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -19,10 +19,10 @@
using namespace llvm;
StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
- for (unsigned i = 0; i < NumPairs; ++i) {
- if (Pairs[i].Value == Value) {
+ for (unsigned i = 0; i < NumMappings; ++i) {
+ if (Mappings[i].Value == Value) {
Valid = true;
- return Pairs[i].Name;
+ return Mappings[i].Name;
}
}
@@ -32,10 +32,10 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
std::string LowerCaseName = Name.lower();
- for (unsigned i = 0; i < NumPairs; ++i) {
- if (Pairs[i].Name == LowerCaseName) {
+ for (unsigned i = 0; i < NumMappings; ++i) {
+ if (Mappings[i].Name == LowerCaseName) {
Valid = true;
- return Pairs[i].Value;
+ return Mappings[i].Value;
}
}
@@ -47,7 +47,7 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
return Value < TooBigImm;
}
-const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = {
{"s1e1r", S1E1R},
{"s1e2r", S1E2R},
{"s1e3r", S1E3R},
@@ -63,9 +63,9 @@ const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
};
AArch64AT::ATMapper::ATMapper()
- : AArch64NamedImmMapper(ATPairs, 0) {}
+ : AArch64NamedImmMapper(ATMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = {
{"oshld", OSHLD},
{"oshst", OSHST},
{"osh", OSH},
@@ -81,9 +81,9 @@ const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[]
};
AArch64DB::DBarrierMapper::DBarrierMapper()
- : AArch64NamedImmMapper(DBarrierPairs, 16u) {}
+ : AArch64NamedImmMapper(DBarrierMappings, 16u) {}
-const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = {
{"zva", ZVA},
{"ivac", IVAC},
{"isw", ISW},
@@ -95,25 +95,25 @@ const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
};
AArch64DC::DCMapper::DCMapper()
- : AArch64NamedImmMapper(DCPairs, 0) {}
+ : AArch64NamedImmMapper(DCMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = {
{"ialluis", IALLUIS},
{"iallu", IALLU},
{"ivau", IVAU}
};
AArch64IC::ICMapper::ICMapper()
- : AArch64NamedImmMapper(ICPairs, 0) {}
+ : AArch64NamedImmMapper(ICMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = {
{"sy", SY},
};
AArch64ISB::ISBMapper::ISBMapper()
- : AArch64NamedImmMapper(ISBPairs, 16) {}
+ : AArch64NamedImmMapper(ISBMappings, 16) {}
-const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = {
{"pldl1keep", PLDL1KEEP},
{"pldl1strm", PLDL1STRM},
{"pldl2keep", PLDL2KEEP},
@@ -135,18 +135,18 @@ const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
};
AArch64PRFM::PRFMMapper::PRFMMapper()
- : AArch64NamedImmMapper(PRFMPairs, 32) {}
+ : AArch64NamedImmMapper(PRFMMappings, 32) {}
-const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = {
{"spsel", SPSel},
{"daifset", DAIFSet},
{"daifclr", DAIFClr}
};
AArch64PState::PStateMapper::PStateMapper()
- : AArch64NamedImmMapper(PStatePairs, 0) {}
+ : AArch64NamedImmMapper(PStateMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
{"mdccsr_el0", MDCCSR_EL0},
{"dbgdtrrx_el0", DBGDTRRX_EL0},
{"mdrar_el1", MDRAR_EL1},
@@ -247,11 +247,11 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
: SysRegMapper(FeatureBits) {
- InstPairs = &MRSPairs[0];
- NumInstPairs = llvm::array_lengthof(MRSPairs);
+ InstMappings = &MRSMappings[0];
+ NumInstMappings = llvm::array_lengthof(MRSMappings);
}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
{"dbgdtrtx_el0", DBGDTRTX_EL0},
{"oslar_el1", OSLAR_EL1},
{"pmswinc_el0", PMSWINC_EL0},
@@ -271,12 +271,12 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
: SysRegMapper(FeatureBits) {
- InstPairs = &MSRPairs[0];
- NumInstPairs = llvm::array_lengthof(MSRPairs);
+ InstMappings = &MSRMappings[0];
+ NumInstMappings = llvm::array_lengthof(MSRMappings);
}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = {
{"osdtrrx_el1", OSDTRRX_EL1},
{"osdtrtx_el1", OSDTRTX_EL1},
{"teecr32_el1", TEECR32_EL1},
@@ -756,7 +756,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[]
};
const AArch64NamedImmMapper::Mapping
-AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = {
+AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = {
{"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
};
@@ -765,29 +765,29 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
std::string NameLower = Name.lower();
// First search the registers shared by all
- for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
- if (SysRegPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+ if (SysRegMappings[i].Name == NameLower) {
Valid = true;
- return SysRegPairs[i].Value;
+ return SysRegMappings[i].Value;
}
}
// Next search for target specific registers
if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
- if (CycloneSysRegPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
+ if (CycloneSysRegMappings[i].Name == NameLower) {
Valid = true;
- return CycloneSysRegPairs[i].Value;
+ return CycloneSysRegMappings[i].Value;
}
}
}
// Now try the instruction-specific registers (either read-only or
// write-only).
- for (unsigned i = 0; i < NumInstPairs; ++i) {
- if (InstPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < NumInstMappings; ++i) {
+ if (InstMappings[i].Name == NameLower) {
Valid = true;
- return InstPairs[i].Value;
+ return InstMappings[i].Value;
}
}
@@ -816,26 +816,26 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
std::string
AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
// First search the registers shared by all
- for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
- if (SysRegPairs[i].Value == Bits) {
- return SysRegPairs[i].Name;
+ for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+ if (SysRegMappings[i].Value == Bits) {
+ return SysRegMappings[i].Name;
}
}
// Next search for target specific registers
if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
- if (CycloneSysRegPairs[i].Value == Bits) {
- return CycloneSysRegPairs[i].Name;
+ for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
+ if (CycloneSysRegMappings[i].Value == Bits) {
+ return CycloneSysRegMappings[i].Name;
}
}
}
// Now try the instruction-specific registers (either read-only or
// write-only).
- for (unsigned i = 0; i < NumInstPairs; ++i) {
- if (InstPairs[i].Value == Bits) {
- return InstPairs[i].Name;
+ for (unsigned i = 0; i < NumInstMappings; ++i) {
+ if (InstMappings[i].Value == Bits) {
+ return InstMappings[i].Name;
}
}
@@ -850,7 +850,7 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+ "_c" + utostr(CRm) + "_" + utostr(Op2);
}
-const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = {
{"ipas2e1is", IPAS2E1IS},
{"ipas2le1is", IPAS2LE1IS},
{"vmalle1is", VMALLE1IS},
@@ -886,4 +886,4 @@ const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
};
AArch64TLBI::TLBIMapper::TLBIMapper()
- : AArch64NamedImmMapper(TLBIPairs, 0) {}
+ : AArch64NamedImmMapper(TLBIMappings, 0) {}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index c60b09a..2ae6f52 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -283,8 +283,8 @@ struct AArch64NamedImmMapper {
};
template<int N>
- AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
- : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+ AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm)
+ : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {}
StringRef toString(uint32_t Value, bool &Valid) const;
uint32_t fromString(StringRef Name, bool &Valid) const;
@@ -294,8 +294,8 @@ struct AArch64NamedImmMapper {
/// N being 0 indicates no immediate syntax-form is allowed.
bool validImm(uint32_t Value) const;
protected:
- const Mapping *Pairs;
- size_t NumPairs;
+ const Mapping *Mappings;
+ size_t NumMappings;
uint32_t TooBigImm;
};
@@ -317,7 +317,7 @@ namespace AArch64AT {
};
struct ATMapper : AArch64NamedImmMapper {
- const static Mapping ATPairs[];
+ const static Mapping ATMappings[];
ATMapper();
};
@@ -341,7 +341,7 @@ namespace AArch64DB {
};
struct DBarrierMapper : AArch64NamedImmMapper {
- const static Mapping DBarrierPairs[];
+ const static Mapping DBarrierMappings[];
DBarrierMapper();
};
@@ -361,7 +361,7 @@ namespace AArch64DC {
};
struct DCMapper : AArch64NamedImmMapper {
- const static Mapping DCPairs[];
+ const static Mapping DCMappings[];
DCMapper();
};
@@ -378,7 +378,7 @@ namespace AArch64IC {
struct ICMapper : AArch64NamedImmMapper {
- const static Mapping ICPairs[];
+ const static Mapping ICMappings[];
ICMapper();
};
@@ -394,7 +394,7 @@ namespace AArch64ISB {
SY = 0xf
};
struct ISBMapper : AArch64NamedImmMapper {
- const static Mapping ISBPairs[];
+ const static Mapping ISBMappings[];
ISBMapper();
};
@@ -424,7 +424,7 @@ namespace AArch64PRFM {
};
struct PRFMMapper : AArch64NamedImmMapper {
- const static Mapping PRFMPairs[];
+ const static Mapping PRFMMappings[];
PRFMMapper();
};
@@ -439,7 +439,7 @@ namespace AArch64PState {
};
struct PStateMapper : AArch64NamedImmMapper {
- const static Mapping PStatePairs[];
+ const static Mapping PStateMappings[];
PStateMapper();
};
@@ -1134,11 +1134,11 @@ namespace AArch64SysReg {
// burdening the common AArch64NamedImmMapper with abstractions only needed in
// this one case.
struct SysRegMapper {
- static const AArch64NamedImmMapper::Mapping SysRegPairs[];
- static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[];
+ static const AArch64NamedImmMapper::Mapping SysRegMappings[];
+ static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[];
- const AArch64NamedImmMapper::Mapping *InstPairs;
- size_t NumInstPairs;
+ const AArch64NamedImmMapper::Mapping *InstMappings;
+ size_t NumInstMappings;
uint64_t FeatureBits;
SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
@@ -1147,12 +1147,12 @@ namespace AArch64SysReg {
};
struct MSRMapper : SysRegMapper {
- static const AArch64NamedImmMapper::Mapping MSRPairs[];
+ static const AArch64NamedImmMapper::Mapping MSRMappings[];
MSRMapper(uint64_t FeatureBits);
};
struct MRSMapper : SysRegMapper {
- static const AArch64NamedImmMapper::Mapping MRSPairs[];
+ static const AArch64NamedImmMapper::Mapping MRSMappings[];
MRSMapper(uint64_t FeatureBits);
};
@@ -1197,7 +1197,7 @@ namespace AArch64TLBI {
};
struct TLBIMapper : AArch64NamedImmMapper {
- const static Mapping TLBIPairs[];
+ const static Mapping TLBIMappings[];
TLBIMapper();
};
@@ -1229,7 +1229,7 @@ namespace AArch64II {
MO_NO_FLAG,
- MO_FRAGMENT = 0x7,
+ MO_FRAGMENT = 0xf,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
@@ -1257,26 +1257,31 @@ namespace AArch64II {
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G0 = 6,
+ /// MO_HI12 - This flag indicates that a symbol operand represents the bits
+ /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
+ /// by-12-bits instruction.
+ MO_HI12 = 7,
+
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
- MO_GOT = 8,
+ MO_GOT = 0x10,
/// MO_NC - Indicates whether the linker is expected to check the symbol
/// reference for overflow. For example in an ADRP/ADD pair of relocations
/// the ADRP usually does check, but not the ADD.
- MO_NC = 0x10,
+ MO_NC = 0x20,
/// MO_TLS - Indicates that the operand being accessed is some kind of
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x20,
+ MO_TLS = 0x40,
/// MO_CONSTPOOL - This flag indicates that a symbol operand represents
/// the address of a constant pool entry for the symbol, rather than the
/// address of the symbol itself.
- MO_CONSTPOOL = 0x40
+ MO_CONSTPOOL = 0x80
};
} // end namespace AArch64II
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 387f1f6..7a1865c 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -27,12 +27,15 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <map>
@@ -678,8 +681,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
}
bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
- TRI = Fn.getSubtarget().getRegisterInfo();
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (!(STI.isCortexA15() && STI.hasNEON()))
+ return false;
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
MRI = &Fn.getRegInfo();
bool Modified = false;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index f080c60..ce0aed9 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -167,9 +167,12 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
"Support ARM v6M instructions",
[HasV6Ops]>;
+def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true",
+ "Support ARM v6k instructions",
+ [HasV6Ops]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6MOps, FeatureThumb2]>;
+ [HasV6MOps, HasV6KOps, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops, FeaturePerfMon]>;
@@ -177,6 +180,9 @@ def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
[HasV7Ops, FeatureVirtualization,
FeatureMP]>;
+def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+ "Support ARM v8.1a instructions",
+ [HasV8Ops, FeatureAClass, FeatureCRC]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -320,12 +326,6 @@ def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
FeatureHasSlowFPVMLx]>;
-def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
// V6M Processors.
def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
@@ -337,6 +337,14 @@ def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
FeatureDB, FeatureMClass]>;
+// V6K Processors.
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>;
+def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
// V6T2 Processors.
def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
FeatureDSPThumb2]>;
@@ -449,6 +457,14 @@ def : ProcessorModel<"cyclone", SwiftModel,
FeatureDB,FeatureDSPThumb2,
FeatureHasRAS, FeatureZCZeroing]>;
+// V8.1 Processors
+def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2,
+ FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 2544a01..102def1 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -120,9 +120,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer.EndCOFFSymbolDef();
}
- // Have common code print out the function header with linkage info etc.
- EmitFunctionHeader();
-
// Emit the rest of the function body.
EmitFunctionBody();
@@ -438,65 +435,6 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
Triple TT(TM.getTargetTriple());
- if (TT.isOSBinFormatMachO()) {
- Reloc::Model RelocM = TM.getRelocationModel();
- if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
- // Declare all the text sections up front (before the DWARF sections
- // emitted by AsmPrinter::doInitialization) so the assembler will keep
- // them together at the beginning of the object file. This helps
- // avoid out-of-range branches that are due a fundamental limitation of
- // the way symbol offsets are encoded with the current Darwin ARM
- // relocations.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(
- getObjFileLowering());
-
- // Collect the set of sections our functions will go into.
- SetVector<const MCSection *, SmallVector<const MCSection *, 8>,
- SmallPtrSet<const MCSection *, 8> > TextSections;
- // Default text section comes first.
- TextSections.insert(TLOFMacho.getTextSection());
- // Now any user defined text sections from function attributes.
- for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F)
- if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
- TextSections.insert(TLOFMacho.SectionForGlobal(F, *Mang, TM));
- // Now the coalescable sections.
- TextSections.insert(TLOFMacho.getTextCoalSection());
- TextSections.insert(TLOFMacho.getConstTextCoalSection());
-
- // Emit the sections in the .s file header to fix the order.
- for (unsigned i = 0, e = TextSections.size(); i != e; ++i)
- OutStreamer.SwitchSection(TextSections[i]);
-
- if (RelocM == Reloc::DynamicNoPIC) {
- const MCSection *sect =
- OutContext.getMachOSection("__TEXT", "__symbol_stub4",
- MachO::S_SYMBOL_STUBS,
- 12, SectionKind::getText());
- OutStreamer.SwitchSection(sect);
- } else {
- const MCSection *sect =
- OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
- MachO::S_SYMBOL_STUBS,
- 16, SectionKind::getText());
- OutStreamer.SwitchSection(sect);
- }
- const MCSection *StaticInitSect =
- OutContext.getMachOSection("__TEXT", "__StaticInit",
- MachO::S_REGULAR |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- SectionKind::getText());
- OutStreamer.SwitchSection(StaticInitSect);
- }
-
- // Compiling with debug info should not affect the code
- // generation. Ensure the cstring section comes before the
- // optional __DWARF secion. Otherwise, PC-relative loads would
- // have to use different instruction sequences at "-g" in order to
- // reach global data in the same object file.
- OutStreamer.SwitchSection(getObjFileLowering().getCStringSection());
- }
-
// Use unified assembler syntax.
OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
@@ -669,7 +607,7 @@ void ARMAsmPrinter::emitAttributes() {
std::string CPUString = STI.getCPUString();
- if (CPUString != "generic") {
+ if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic"
// FIXME: remove krait check when GNU tools support krait cpu
if (STI.isKrait()) {
ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
@@ -723,7 +661,8 @@ void ARMAsmPrinter::emitAttributes() {
// Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
if (STI.hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::AllowNeonARMv8);
+ STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a:
+ ARMBuildAttrs::AllowNeonARMv8);
} else {
if (STI.hasFPARMv8())
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
@@ -960,10 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSymbol *MCSym;
if (ACPV->isLSDA()) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- OS << DL->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
- MCSym = OutContext.GetOrCreateSymbol(OS.str());
+ MCSym = getCurExceptionSym();
} else if (ACPV->isBlockAddress()) {
const BlockAddress *BA =
cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 50cb954..e475ae4 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -103,13 +103,16 @@ private:
const MachineInstr *MI);
public:
- unsigned getISAEncoding(const Function *F) override {
+ unsigned getISAEncoding() override {
// ARM/Darwin adds ISA to the DWARF info for each function.
Triple TT(TM.getTargetTriple());
if (!TT.isOSBinFormatMachO())
return 0;
- const ARMSubtarget &STI = TM.getSubtarget<ARMSubtarget>(*F);
- return STI.isThumb() ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
+ bool isThumb = TT.getArch() == Triple::thumb ||
+ TT.getArch() == Triple::thumbeb ||
+ TT.getSubArch() == Triple::ARMSubArch_v7m ||
+ TT.getSubArch() == Triple::ARMSubArch_v6m;
+ return isThumb ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
}
private:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 29ee22e..7ee3cb0 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -4115,19 +4116,21 @@ enum ARMExeDomain {
//
std::pair<uint16_t, uint16_t>
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
- // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
- // if they are not predicated.
- if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
- return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
-
- // CortexA9 is particularly picky about mixing the two and wants these
- // converted.
- if (Subtarget.isCortexA9() && !isPredicated(MI) &&
- (MI->getOpcode() == ARM::VMOVRS ||
- MI->getOpcode() == ARM::VMOVSR ||
- MI->getOpcode() == ARM::VMOVS))
- return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
-
+ // If we don't have access to NEON instructions then we won't be able
+ // to swizzle anything to the NEON domain. Check to make sure.
+ if (Subtarget.hasNEON()) {
+ // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+ // if they are not predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+
+ // CortexA9 is particularly picky about mixing the two and wants these
+ // converted.
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
+ return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+ }
// No other instructions can be swizzled, so just determine their domain.
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
@@ -4220,6 +4223,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ // Make sure we've got NEON instructions.
+ assert(Subtarget.hasNEON() && "VORRd requires NEON");
+
// Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
@@ -4507,7 +4513,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
}
bool ARMBaseInstrInfo::hasNOP() const {
- return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
+ return (Subtarget.getFeatureBits() & ARM::HasV6KOps) != 0;
}
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 7574727..a8c7657 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -45,26 +45,27 @@
using namespace llvm;
-ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
+ARMBaseRegisterInfo::ARMBaseRegisterInfo()
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
+
+static unsigned getFramePointerReg(const ARMSubtarget &STI) {
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin() || STI.isThumb1Only())
- FramePtr = ARM::R7;
+ return ARM::R7;
else
- FramePtr = ARM::R11;
+ return ARM::R11;
} else if (STI.isTargetWindows())
- FramePtr = ARM::R11;
+ return ARM::R11;
else // ARM EABI
- FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
+ return STI.isThumb() ? ARM::R7 : ARM::R11;
}
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
const MCPhysReg *RegList =
STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
- if (!MF) return RegList;
-
const Function *F = MF->getFunction();
if (F->getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
@@ -89,8 +90,10 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return RegList;
}
-const uint32_t*
-ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
@@ -102,8 +105,10 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
-const uint32_t*
-ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i32 argument (which must also be the register used to return a
@@ -121,7 +126,8 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -130,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::FPSCR);
Reserved.set(ARM::APSR_NZCV);
if (TFI->hasFP(MF))
- Reserved.set(FramePtr);
+ Reserved.set(getFramePointerReg(STI));
if (hasBasePointer(MF))
Reserved.set(BasePtr);
// Some targets reserve R9.
@@ -150,9 +156,9 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-const TargetRegisterClass*
-ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
- const {
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &) const {
const TargetRegisterClass *Super = RC;
TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
do {
@@ -187,7 +193,8 @@ ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
switch (RC->getID()) {
default:
@@ -283,29 +290,6 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
}
}
-bool
-ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- // CortexA9 has a Write-after-write hazard for NEON registers.
- if (!STI.isLikeA9())
- return false;
-
- switch (RC->getID()) {
- case ARM::DPRRegClassID:
- case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- case ARM::QPRRegClassID:
- case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- case ARM::SPRRegClassID:
- case ARM::SPR_8RegClassID:
- // Avoid reusing S, D, and Q registers.
- // Don't increase register pressure for QQ and QQQQ.
- return true;
- default:
- return false;
- }
-}
-
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -350,7 +334,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return false;
// Stack realignment requires a frame pointer. If we already started
// register allocation with frame pointer elimination, it is too late now.
- if (!MRI->canReserveReg(FramePtr))
+ if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>())))
return false;
// We may also need a base pointer if there are dynamic allocas or stack
// pointer adjustments around calls.
@@ -384,10 +368,11 @@ cannotEliminateFrame(const MachineFunction &MF) const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
if (TFI->hasFP(MF))
- return FramePtr;
+ return getFramePointerReg(STI);
return ARM::SP;
}
@@ -539,7 +524,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The incoming offset is relating to the SP at the start of the function,
// but when we access the local it'll be relative to the SP after local
// allocation, so adjust our SP-relative offset by that allocation size.
- Offset = -Offset;
Offset += MFI->getLocalFrameSize();
// Assume that we'll have at least some spill slots allocated.
// FIXME: This is a total SWAG number. We should run some statistics
@@ -552,9 +536,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
if (TFI->hasFP(MF) &&
- (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s &&
!((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
- if (isFrameOffsetLegal(MI, FPOffset))
+ if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
return false;
}
// If we can reference via the stack pointer, try that.
@@ -562,7 +545,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
- if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
return false;
// The offset likely isn't legal, we want to allocate a virtual base register.
@@ -625,7 +608,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
(void)Done;
}
-bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const {
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -669,7 +652,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
NumBits = 8;
break;
case ARMII::AddrModeT1_s:
- NumBits = 8;
+ NumBits = (BaseReg == ARM::SP ? 8 : 5);
Scale = 4;
isSigned = false;
break;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 17027c2..fdc1ef9 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -21,10 +21,6 @@
#include "ARMGenRegisterInfo.inc"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseInstrInfo;
- class Type;
-
/// Register allocation hints.
namespace ARMRI {
enum {
@@ -82,27 +78,22 @@ static inline bool isCalleeSavedRegister(unsigned Reg,
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
- const ARMSubtarget &STI;
-
- /// FramePtr - ARM physical register used as frame ptr.
- unsigned FramePtr;
-
/// BasePtr - ARM physical register used as a base ptr in complex stack
/// frames. I.e., when we need a 3rd base, not just SP and FP, due to
/// variable size stack objects.
unsigned BasePtr;
// Can be only subclassed.
- explicit ARMBaseRegisterInfo(const ARMSubtarget &STI);
+ explicit ARMBaseRegisterInfo();
// Return the opcode that implements 'Op', or 0 if no opcode
unsigned getOpcode(int Op) const;
public:
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
@@ -113,7 +104,8 @@ public:
///
/// Should return NULL in the case that the calling convention does not have
/// this property
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -124,7 +116,8 @@ public:
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
const TargetRegisterClass *
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
@@ -138,8 +131,6 @@ public:
void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
- bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override;
-
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
@@ -152,7 +143,7 @@ public:
int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
bool cannotEliminateFrame(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 375d394..9c8d228 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -2265,7 +2265,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
@@ -2416,7 +2416,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 5a5bd57..830953b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -293,7 +293,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -742,8 +742,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 6ebf640..44cd1ef 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -257,7 +257,7 @@ private:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
// Form pairs of consecutive R, S, D, or Q registers.
@@ -3086,7 +3086,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
// Store exclusive double return a i32 value which is the return status
// of the issued store.
- EVT ResTys[] = { MVT::i32, MVT::Other };
+ const EVT ResTys[] = {MVT::i32, MVT::Other};
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
// Place arguments in the right order.
@@ -3472,9 +3472,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
bool ARMDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ assert(ConstraintID == InlineAsm::Constraint_m &&
+ "unexpected asm memory constraint");
// Require the address to be in a register. That is safe for all ARM
// variants and it is hard to do anything much smarter without knowing
// how the operand is used.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 56290aa..3b1b8dd 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -23,6 +23,7 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,6 +41,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -47,6 +49,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <utility>
using namespace llvm;
@@ -568,14 +571,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
- MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
- MVT::v4i16, MVT::v2i16,
- MVT::v2i32};
- for (unsigned i = 0; i < 6; ++i) {
+ for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
+ MVT::v2i32}) {
for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal);
- setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal);
+ setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
}
}
}
@@ -614,6 +615,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
}
@@ -869,14 +876,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Various VFP goodness
if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
- // int <-> fp are custom expanded into bit_convert + ARMISD ops.
- if (Subtarget->hasVFP2()) {
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- }
-
// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
@@ -1033,11 +1032,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::RBIT: return "ARMISD::RBIT";
- case ARMISD::FTOSI: return "ARMISD::FTOSI";
- case ARMISD::FTOUI: return "ARMISD::FTOUI";
- case ARMISD::SITOF: return "ARMISD::SITOF";
- case ARMISD::UITOF: return "ARMISD::UITOF";
-
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1164,6 +1158,20 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
return TargetLowering::getRegClassFor(VT);
}
+// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
+// source/dest is aligned and the copy size is large enough. We therefore want
+// to align such objects passed to memory intrinsics.
+bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const {
+ if (!isa<MemIntrinsic>(CI))
+ return false;
+ MinSize = 8;
+ // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
+ // cycle faster than 4-byte aligned LDM.
+ PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+ return true;
+}
+
// Create a fast isel object.
FastISel *
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
@@ -1815,16 +1823,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
+ Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
// Set isThisReturn to false if the calling convention is not one that
// allows 'returned' to be modeled in this way, so LowerCallResult does
// not try to pass 'this' straight through
isThisReturn = false;
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1857,60 +1865,61 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// on the stack. Remember the next parameter register to allocate,
/// and then confiscate the rest of the parameter registers to insure
/// this.
-void
-ARMTargetLowering::HandleByVal(
- CCState *State, unsigned &size, unsigned Align) const {
- unsigned reg = State->AllocateReg(GPRArgRegs);
+void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
+ unsigned Align) const {
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
- if (Subtarget->isAAPCS_ABI() && Align > 4) {
- unsigned AlignInRegs = Align / 4;
- unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
- for (unsigned i = 0; i < Waste; ++i)
- reg = State->AllocateReg(GPRArgRegs);
- }
- if (reg != 0) {
- unsigned excess = 4 * (ARM::R4 - reg);
-
- // Special case when NSAA != SP and parameter size greater than size of
- // all remained GPR regs. In that case we can't split parameter, we must
- // send it to stack. We also must set NCRN to R4, so waste all
- // remained registers.
- const unsigned NSAAOffset = State->getNextStackOffset();
- if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
- while (State->AllocateReg(GPRArgRegs))
- ;
- return;
- }
+ // Byval (as with any stack) slots are always at least 4 byte aligned.
+ Align = std::max(Align, 4U);
- // First register for byval parameter is the first register that wasn't
- // allocated before this method call, so it would be "reg".
- // If parameter is small enough to be saved in range [reg, r4), then
- // the end (first after last) register would be reg + param-size-in-regs,
- // else parameter would be splitted between registers and stack,
- // end register would be r4 in this case.
- unsigned ByValRegBegin = reg;
- unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
- State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
- // Note, first register is allocated in the beginning of function already,
- // allocate remained amount of registers we need.
- for (unsigned i = reg+1; i != ByValRegEnd; ++i)
- State->AllocateReg(GPRArgRegs);
- // A byval parameter that is split between registers and memory needs its
- // size truncated here.
- // In the case where the entire structure fits in registers, we set the
- // size in memory to zero.
- if (size < excess)
- size = 0;
- else
- size -= excess;
- }
+ unsigned Reg = State->AllocateReg(GPRArgRegs);
+ if (!Reg)
+ return;
+
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ Reg = State->AllocateReg(GPRArgRegs);
+
+ if (!Reg)
+ return;
+
+ unsigned Excess = 4 * (ARM::R4 - Reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ const unsigned NSAAOffset = State->getNextStackOffset();
+ if (NSAAOffset != 0 && Size > Excess) {
+ while (State->AllocateReg(GPRArgRegs))
+ ;
+ return;
}
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = Reg;
+ unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs);
+ // A byval parameter that is split between registers and memory needs its
+ // size truncated here.
+ // In the case where the entire structure fits in registers, we set the
+ // size in memory to zero.
+ Size = std::max<int>(Size - Excess, 0);
}
+
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
@@ -1991,7 +2000,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isCalleeStructRet || isCallerStructRet)
return false;
- // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
// emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
// the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
// support in the assembler and linker to be used. This would need to be
@@ -2819,50 +2828,6 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
-void
-ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned InRegsParamRecordIdx,
- unsigned ArgSize,
- unsigned &ArgRegsSize,
- unsigned &ArgRegsSaveSize)
- const {
- unsigned NumGPRs;
- if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
- unsigned RBegin, REnd;
- CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
- NumGPRs = REnd - RBegin;
- } else {
- unsigned int firstUnalloced;
- firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs);
- NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
- }
-
- unsigned Align = Subtarget->getFrameLowering()->getStackAlignment();
- ArgRegsSize = NumGPRs * 4;
-
- // If parameter is split between stack and GPRs...
- if (NumGPRs && Align > 4 &&
- (ArgRegsSize < ArgSize ||
- InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
- // Add padding for part of param recovered from GPRs. For example,
- // if Align == 8, its last byte must be at address K*8 - 1.
- // We need to do it, since remained (stack) part of parameter has
- // stack alignment, and we need to "attach" "GPRs head" without gaps
- // to it:
- // Stack:
- // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
- // [ [padding] [GPRs head] ] [ Tail passed via stack ....
- //
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned Padding =
- OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
- ArgRegsSaveSize = ArgRegsSize + Padding;
- } else
- // We don't need to extend regs save size for byval parameters if they
- // are passed via GPRs only.
- ArgRegsSaveSize = ArgRegsSize;
-}
-
// The remaining GPRs hold either the beginning of variable-argument
// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
@@ -2876,13 +2841,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- unsigned ArgSize,
- bool ForceMutable,
- unsigned ByValStoreOffset,
- unsigned TotalArgRegsSaveSize) const {
-
+ int ArgOffset,
+ unsigned ArgSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
@@ -2897,82 +2857,39 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex, lastRegToSaveIndex;
unsigned RBegin, REnd;
if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
- firstRegToSaveIndex = RBegin - ARM::R0;
- lastRegToSaveIndex = REnd - ARM::R0;
} else {
- firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs);
- lastRegToSaveIndex = 4;
- }
-
- unsigned ArgRegsSize, ArgRegsSaveSize;
- computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
- ArgRegsSize, ArgRegsSaveSize);
-
- // Store any by-val regs to their spots on the stack so that they may be
- // loaded by deferencing the result of formal parameter pointer or va_next.
- // Note: once stack area for byval/varargs registers
- // was initialized, it can't be initialized again.
- if (ArgRegsSaveSize) {
- unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
-
- if (Padding) {
- assert(AFI->getStoredByValParamsPadding() == 0 &&
- "The only parameter may be padded.");
- AFI->setStoredByValParamsPadding(Padding);
- }
-
- int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
- Padding +
- ByValStoreOffset -
- (int64_t)TotalArgRegsSaveSize,
- false);
- SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
- if (Padding) {
- MFI->CreateFixedObject(Padding,
- ArgOffset + ByValStoreOffset -
- (int64_t)ArgRegsSaveSize,
- false);
- }
-
- SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
- ++firstRegToSaveIndex, ++i) {
- const TargetRegisterClass *RC;
- if (AFI->isThumb1OnlyFunction())
- RC = &ARM::tGPRRegClass;
- else
- RC = &ARM::GPRRegClass;
+ unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
+ REnd = ARM::R4;
+ }
- unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
- }
+ if (REnd != RBegin)
+ ArgOffset = -4 * (ARM::R4 - RBegin);
- AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+ int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- return FrameIndex;
- } else {
- if (ArgSize == 0) {
- // We cannot allocate a zero-byte object for the first variadic argument,
- // so just make up a size.
- ArgSize = 4;
- }
- // This will point to the next argument passed via stack.
- return MFI->CreateFixedObject(
- ArgSize, ArgOffset, !ForceMutable);
+ SmallVector<SDValue, 4> MemOps;
+ const TargetRegisterClass *RC =
+ AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+
+ for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
+ unsigned VReg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
}
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ return FrameIndex;
}
// Setup stack frame, the va_list pointer will start from.
@@ -2990,11 +2907,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
- int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
- CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
- 0, TotalArgRegsSaveSize);
-
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+ CCInfo.getInRegsParamsCount(),
+ CCInfo.getNextStackOffset(), 4);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -3020,7 +2935,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
isVarArg));
SmallVector<SDValue, 16> ArgValues;
- int lastInsIndex = -1;
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
@@ -3030,50 +2944,40 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// We also increase this value in case of varargs function.
AFI->setArgRegsSaveSize(0);
- unsigned ByValStoreOffset = 0;
- unsigned TotalArgRegsSaveSize = 0;
- unsigned ArgRegsSaveSizeMaxAlign = 4;
-
// Calculate the amount of stack space that we need to allocate to store
// byval and variadic arguments that are passed in registers.
// We need to know this before we allocate the first byval or variadic
// argument, as they will be allocated a stack slot below the CFA (Canonical
// Frame Address, the stack pointer at entry to the function).
+ unsigned ArgRegBegin = ARM::R4;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
+ break;
+
CCValAssign &VA = ArgLocs[i];
- if (VA.isMemLoc()) {
- int index = VA.getValNo();
- if (index != lastInsIndex) {
- ISD::ArgFlagsTy Flags = Ins[index].Flags;
- if (Flags.isByVal()) {
- unsigned ExtraArgRegsSize;
- unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
- Flags.getByValSize(),
- ExtraArgRegsSize, ExtraArgRegsSaveSize);
-
- TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
- if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
- ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
- CCInfo.nextInRegsParam();
- }
- lastInsIndex = index;
- }
- }
+ unsigned Index = VA.getValNo();
+ ISD::ArgFlagsTy Flags = Ins[Index].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ assert(VA.isMemLoc() && "unexpected byval pointer in reg");
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
+ ArgRegBegin = std::min(ArgRegBegin, RBegin);
+
+ CCInfo.nextInRegsParam();
}
CCInfo.rewindByValRegsInfo();
- lastInsIndex = -1;
+
+ int lastInsIndex = -1;
if (isVarArg && MFI->hasVAStart()) {
- unsigned ExtraArgRegsSize;
- unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
- ExtraArgRegsSize, ExtraArgRegsSaveSize);
- TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+ unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ if (RegIdx != array_lengthof(GPRArgRegs))
+ ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
}
- // If the arg regs save area contains N-byte aligned values, the
- // bottom of it must be at least N-byte aligned.
- TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
- TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
+
+ unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
+ AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -3178,18 +3082,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
- ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
- int FrameIndex = StoreByValRegs(
- CCInfo, DAG, dl, Chain, CurOrigArg,
- CurByValIndex,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- Flags.getByValSize(),
- true /*force mutable frames*/,
- ByValStoreOffset,
- TotalArgRegsSaveSize);
- ByValStoreOffset += Flags.getByValSize();
- ByValStoreOffset = std::min(ByValStoreOffset, 16U);
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex, VA.getLocMemOffset(),
+ Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
CCInfo.nextInRegsParam();
} else {
@@ -3894,7 +3789,6 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
-
if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
@@ -3907,20 +3801,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
/*isSigned*/ false, SDLoc(Op)).first;
}
- SDLoc dl(Op);
- unsigned Opc;
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid opcode!");
- case ISD::FP_TO_SINT:
- Opc = ARMISD::FTOSI;
- break;
- case ISD::FP_TO_UINT:
- Opc = ARMISD::FTOUI;
- break;
- }
- Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+ return Op;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -3960,7 +3841,6 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
-
if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
@@ -3973,21 +3853,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
/*isSigned*/ false, SDLoc(Op)).first;
}
- SDLoc dl(Op);
- unsigned Opc;
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid opcode!");
- case ISD::SINT_TO_FP:
- Opc = ARMISD::SITOF;
- break;
- case ISD::UINT_TO_FP:
- Opc = ARMISD::UITOF;
- break;
- }
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(Opc, dl, VT, Op);
+ return Op;
}
SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
@@ -7239,16 +7105,20 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (IsThumb2) {
+ if (Subtarget->useMovt(*MF)) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
- .addImm(LoopSize & 0xFFFF));
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+ Vtmp).addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(Vtmp).addImm(LoopSize >> 16));
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+ varEnd)
+ .addReg(Vtmp)
+ .addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -10076,6 +9946,28 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
}
+bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+ EVT VT = ExtVal.getValueType();
+
+ if (!isTypeLegal(VT))
+ return false;
+
+ // Don't create a loadext if we can fold the extension into a wide/long
+ // instruction.
+ // If there's more than one user instruction, the loadext is desirable no
+ // matter what. There can be two uses by the same instruction.
+ if (ExtVal->use_empty() ||
+ !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
+ return true;
+
+ SDNode *U = *ExtVal->use_begin();
+ if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
+ U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+ return false;
+
+ return true;
+}
+
bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
@@ -10289,9 +10181,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// Thumb2 and ARM modes can use cmn for negative immediates.
if (!Subtarget->isThumb())
- return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
+ return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+ return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
// Thumb1 doesn't have cmn, and only 8-bit immediates.
return Imm >= 0 && Imm <= 255;
}
@@ -10302,7 +10194,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
/// immediate into a register.
bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Same encoding for add/sub, just flip the sign.
- int64_t AbsImm = llvm::abs64(Imm);
+ int64_t AbsImm = std::abs(Imm);
if (!Subtarget->isThumb())
return ARM_AM::getSOImmVal(AbsImm) != -1;
if (Subtarget->isThumb2())
@@ -11198,9 +11090,12 @@ bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// For the real atomic operations, we have ldrex/strex up to 32 bits,
// and up to 64 bits on the non-M profiles
-bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= (Subtarget->isMClass() ? 32U : 64U);
+ return (Size <= (Subtarget->isMClass() ? 32U : 64U))
+ ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
// This has so far only been implemented for MachO.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index ec1407d..dd4c954 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -65,11 +65,6 @@ namespace llvm {
RBIT, // ARM bitreverse instruction
- FTOSI, // FP to sint within a FP register.
- FTOUI, // FP to uint within a FP register.
- SITOF, // sint to FP within a FP register.
- UITOF, // uint to FP within a FP register.
-
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
@@ -283,6 +278,8 @@ namespace llvm {
using TargetLowering::isZExtFree;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
@@ -346,6 +343,12 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
const ARMSubtarget* getSubtarget() const {
return Subtarget;
}
@@ -360,6 +363,9 @@ namespace llvm {
return true;
}
+ bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const override;
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -404,7 +410,8 @@ namespace llvm {
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool useLoadStackGuardNode() const override;
@@ -525,12 +532,8 @@ namespace llvm {
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- unsigned ArgSize,
- bool ForceMutable,
- unsigned ByValStoreOffset,
- unsigned TotalArgRegsSaveSize) const;
+ int ArgOffset,
+ unsigned ArgSize) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
@@ -538,12 +541,6 @@ namespace llvm {
unsigned TotalArgRegsSaveSize,
bool ForceMutable = false) const;
- void computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned InRegsParamRecordIdx,
- unsigned ArgSize,
- unsigned &ArgRegsSize,
- unsigned &ArgRegsSaveSize) const;
-
SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 7d27cf3..e79608d 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -983,7 +983,12 @@ class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV6];
}
-
+class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasVFP2];
+}
+class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP];
+}
//===----------------------------------------------------------------------===//
// Thumb Instruction Format Definitions.
//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index bc617f0..7c004c9 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -30,8 +30,7 @@
using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
@@ -146,6 +145,10 @@ namespace {
return false;
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ // Don't do this for Thumb1.
+ if (STI.isThumb1Only())
+ return false;
+
const TargetMachine &TM = MF.getTarget();
if (TM.getRelocationModel() != Reloc::PIC_)
return false;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 126c552..c3984ca 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -199,6 +199,9 @@ def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV6K : Predicate<"Subtarget->hasV6KOps()">,
+ AssemblerPredicate<"HasV6KOps", "armv6k">;
+def NoV6K : Predicate<"!Subtarget->hasV6KOps()">;
def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
AssemblerPredicate<"HasV7Ops", "armv7">;
def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
@@ -223,6 +226,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">,
+ AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
@@ -1835,11 +1840,11 @@ def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
let Inst{7-0} = imm;
}
-def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>;
def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 2a7b4b5..a6a07a8 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2790,7 +2790,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))))]>;
class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode ShOp>
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -2826,7 +2826,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy,
- SDNode MulOp, SDNode ShOp>
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -3674,7 +3674,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VMulOpSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, string Dt, SDNode ShOp> {
+ string OpcodeStr, string Dt, SDPatternOperator ShOp> {
def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
@@ -3711,27 +3711,38 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
// Neon 3-argument intrinsics,
-// element sizes of 8, 16 and 32 bits:
-multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itinD, InstrItinClass itinQ,
+// element sizes of 16 and 32 bits:
+multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
- def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
- OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
- def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+ def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
- def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+ def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
// 128-bit vector types.
- def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
- OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
- def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+ def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
- def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+ def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
}
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp>
+ :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
+ itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
+ // 64-bit vector types.
+ def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ // 128-bit vector types.
+ def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+}
// Neon Long Multiply-Op vector operations,
// element sizes of 8, 16 and 32 bits:
@@ -4305,6 +4316,147 @@ defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+let Predicates = [HasNEON, HasV8_1a] in {
+ // v8.1a Neon Rounding Double Multiply-Op vector operations,
+ // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
+ // (Q += D * D)
+ defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqadds
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v2i32 (int_arm_neon_vqadds
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v8i16 (int_arm_neon_vqadds
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))))),
+ (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+ def : Pat<(v4i32 (int_arm_neon_vqadds
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))))),
+ (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+ defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqadds
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh
+ (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
+ imm:$lane))>;
+ def : Pat<(v2i32 (int_arm_neon_vqadds
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh
+ (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ imm:$lane))>;
+ def : Pat<(v8i16 (int_arm_neon_vqadds
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh
+ (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ imm:$lane)))))),
+ (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
+ (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+ def : Pat<(v4i32 (int_arm_neon_vqadds
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ imm:$lane)))))),
+ (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
+ (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+ // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
+ // (Q -= D * D)
+ defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqsubs
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v2i32 (int_arm_neon_vqsubs
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v8i16 (int_arm_neon_vqsubs
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))))),
+ (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+ def : Pat<(v4i32 (int_arm_neon_vqsubs
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))))),
+ (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+ defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqsubs
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh
+ (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
+ def : Pat<(v2i32 (int_arm_neon_vqsubs
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh
+ (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ imm:$lane))>;
+ def : Pat<(v8i16 (int_arm_neon_vqsubs
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh
+ (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ imm:$lane)))))),
+ (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
+ (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+ def : Pat<(v4i32 (int_arm_neon_vqsubs
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ imm:$lane)))))),
+ (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
+ (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+}
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlal", "s", null_frag>;
@@ -6158,6 +6310,21 @@ class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+class NVCVTIFPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode GPR:$a)),
+ (f32 (EXTRACT_SUBREG
+ (v2f32 (Inst
+ (INSERT_SUBREG
+ (v2f32 (IMPLICIT_DEF)),
+ (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
+ ssub_0))>;
+class NVCVTFIPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(i32 (OpNode SPR:$a)),
+ (i32 (EXTRACT_SUBREG
+ (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, ssub_0))),
+ ssub_0))>;
+
def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
@@ -6173,10 +6340,22 @@ def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;
def : N3VSPat<NEONfmin, VMINfd>;
-def : N2VSPat<arm_ftosi, VCVTf2sd>;
-def : N2VSPat<arm_ftoui, VCVTf2ud>;
-def : N2VSPat<arm_sitof, VCVTs2fd>;
-def : N2VSPat<arm_uitof, VCVTu2fd>;
+def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
+def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
+def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
+def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
+
+// NEON doesn't have any f64 conversions, so provide patterns to make
+// sure the VFP conversions match when extracting from a vector.
+def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+ (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+ (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+ (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+ (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
def : Pat<(f32 (bitconvert GPR:$a)),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e0a9314..afff016 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -11,16 +11,10 @@
//
//===----------------------------------------------------------------------===//
-def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
-def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
-def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
-def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
-def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
-def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
@@ -633,7 +627,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
- [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>,
+ []>,
Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
@@ -641,7 +635,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
- [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>,
+ []>,
Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
@@ -649,7 +643,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
- [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>,
+ []>,
Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
@@ -664,7 +658,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
- [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>,
+ []>,
Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
@@ -676,6 +670,27 @@ multiclass vcvt_inst<string opc, bits<2> rm,
let Inst{8} = 1;
}
}
+
+ let Predicates = [HasFPARMv8] in {
+ def : Pat<(i32 (fp_to_sint (node SPR:$a))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SS") SPR:$a),
+ GPR)>;
+ def : Pat<(i32 (fp_to_uint (node SPR:$a))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"US") SPR:$a),
+ GPR)>;
+ }
+ let Predicates = [HasFPARMv8, HasDPVFP] in {
+ def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SD") DPR:$a),
+ GPR)>;
+ def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"UD") DPR:$a),
+ GPR)>;
+ }
}
defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
@@ -980,14 +995,22 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
- [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+ []> {
let Inst{7} = 1; // s32
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(f64 (sint_to_fp GPR:$a)),
+ (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+ def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))),
+ (VSITOD (VLDRS addrmode5:$a))>;
+}
+
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd),(ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // s32
// Some single precision VFP instructions may be executed on both NEON and
@@ -995,17 +1018,31 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
+ (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))),
+ (VSITOS (VLDRS addrmode5:$a))>;
+
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
- [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+ []> {
let Inst{7} = 0; // u32
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(f64 (uint_to_fp GPR:$a)),
+ (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+ def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))),
+ (VUITOD (VLDRS addrmode5:$a))>;
+}
+
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+ []> {
let Inst{7} = 0; // u32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1013,6 +1050,12 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
+ (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))),
+ (VUITOS (VLDRS addrmode5:$a))>;
+
// FP -> Int:
class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1055,14 +1098,22 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+ []> {
let Inst{7} = 1; // Z bit
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
+ (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
+
+ def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
+}
+
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1070,17 +1121,31 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
+ (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+ []> {
let Inst{7} = 1; // Z bit
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
+ (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
+
+ def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
+}
+
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1088,6 +1153,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
+ (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
// FIXME: Verify encoding after integrated assembler is working.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a8d0981..eca8e28 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -19,7 +19,7 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -38,6 +38,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index ddfdb52..a68ab1b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -149,11 +149,7 @@ public:
unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
- unsigned getArgRegsSaveSize(unsigned Align = 0) const {
- if (!Align)
- return ArgRegsSaveSize;
- return (ArgRegsSaveSize + Align - 1) & ~(Align - 1);
- }
+ unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
unsigned getReturnRegsCount() const { return ReturnRegsCount; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 80b4b48..e6e8cdf 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -16,6 +16,4 @@ using namespace llvm;
void ARMRegisterInfo::anchor() { }
-ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
+ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {}
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index b623173..e2e650e 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -23,7 +23,7 @@ class ARMSubtarget;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
virtual void anchor();
public:
- ARMRegisterInfo(const ARMSubtarget &STI);
+ ARMRegisterInfo();
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 89624dd..fbec9e6 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -129,6 +129,7 @@ void ARMSubtarget::initializeEnvironment() {
HasV5TEOps = false;
HasV6Ops = false;
HasV6MOps = false;
+ HasV6KOps = false;
HasV6T2Ops = false;
HasV7Ops = false;
HasV8Ops = false;
@@ -165,6 +166,7 @@ void ARMSubtarget::initializeEnvironment() {
HasTrustZone = false;
HasCrypto = false;
HasCRC = false;
+ HasV8_1a = false;
HasZeroCycleZeroing = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f4deddf..f36cd5c 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -56,13 +56,14 @@ protected:
ARMProcClassEnum ARMProcClass;
/// HasV4TOps, HasV5TOps, HasV5TEOps,
- /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
+ /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
bool HasV4TOps;
bool HasV5TOps;
bool HasV5TEOps;
bool HasV6Ops;
bool HasV6MOps;
+ bool HasV6KOps;
bool HasV6T2Ops;
bool HasV7Ops;
bool HasV8Ops;
@@ -181,6 +182,9 @@ protected:
/// HasCRC - if true, processor supports CRC instructions
bool HasCRC;
+ /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions
+ bool HasV8_1a;
+
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing;
@@ -287,6 +291,7 @@ public:
bool hasV5TEOps() const { return HasV5TEOps; }
bool hasV6Ops() const { return HasV6Ops; }
bool hasV6MOps() const { return HasV6MOps; }
+ bool hasV6KOps() const { return HasV6KOps; }
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
@@ -311,6 +316,7 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ bool hasV8_1a() const { return HasV8_1a; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index a97a058..1bee1b0 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -37,6 +37,11 @@ EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
" to make use of cmpxchg flow-based information"),
cl::init(true));
+static cl::opt<bool>
+EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
+ cl::desc("Enable ARM load/store optimization pass"),
+ cl::init(true));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -105,9 +110,11 @@ computeTargetABI(const Triple &TT, StringRef CPU,
return TargetABI;
}
-static std::string computeDataLayout(const Triple &TT,
- ARMBaseTargetMachine::ARMABI ABI,
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+ const TargetOptions &Options,
bool isLittle) {
+ const Triple Triple(TT);
+ auto ABI = computeTargetABI(Triple, CPU, Options);
std::string Ret = "";
if (isLittle)
@@ -117,7 +124,7 @@ static std::string computeDataLayout(const Triple &TT,
// Big endian.
Ret += "E";
- Ret += DataLayout::getManglingComponent(TT);
+ Ret += DataLayout::getManglingComponent(Triple);
// Pointers are 32 bits and aligned to 32 bits.
Ret += "-p:32:32";
@@ -147,7 +154,7 @@ static std::string computeDataLayout(const Triple &TT,
// The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
// aligned everywhere else.
- if (TT.isOSNaCl())
+ if (Triple.isOSNaCl())
Ret += "-S128";
else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
Ret += "-S64";
@@ -164,9 +171,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
+ CPU, FS, Options, RM, CM, OL),
TargetABI(computeTargetABI(Triple(TT), CPU, Options)),
- DL(computeDataLayout(Triple(TT), TargetABI, isLittle)),
TLOF(createTLOF(Triple(getTargetTriple()))),
Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
@@ -325,7 +332,7 @@ void ARMPassConfig::addIRPasses() {
}
bool ARMPassConfig::addPreISel() {
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive)
// FIXME: This is using the thumb1 only constant value for
// maximal global offset for merging globals. We may want
// to look into using the old value for non-thumb1 code of
@@ -339,32 +346,30 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
- const ARMSubtarget *Subtarget = &getARMSubtarget();
- if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+ if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
TM->Options.EnableFastISel)
addPass(createARMGlobalBaseRegPass());
return false;
}
void ARMPassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createARMLoadStoreOptimizationPass(true));
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
+ if (getOptLevel() != CodeGenOpt::None) {
addPass(createMLxExpansionPass());
- // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
- // enabled when NEON is available.
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
- getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
- addPass(createA15SDOptimizerPass());
+
+ if (EnableARMLoadStoreOpt)
+ addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
+
+ if (!DisableA15SDOptimization)
+ addPass(createA15SDOptimizerPass());
}
}
void ARMPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None) {
- addPass(createARMLoadStoreOptimizationPass());
+ if (EnableARMLoadStoreOpt)
+ addPass(createARMLoadStoreOptimizationPass());
- if (getARMSubtarget().hasNEON())
- addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
// Expand some pseudo instructions into multiple instructions to allow
@@ -372,26 +377,21 @@ void ARMPassConfig::addPreSched2() {
addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only()) {
- // in v8, IfConversion depends on Thumb instruction widths
- if (getARMSubtarget().restrictIT() &&
- !getARMSubtarget().prefers32BitThumb())
- addPass(createThumb2SizeReductionPass());
+ // in v8, IfConversion depends on Thumb instruction widths
+ if (getARMSubtarget().restrictIT())
+ addPass(createThumb2SizeReductionPass());
+ if (!getARMSubtarget().isThumb1Only())
addPass(&IfConverterID);
- }
}
- if (getARMSubtarget().isThumb2())
- addPass(createThumb2ITBlockPass());
+ addPass(createThumb2ITBlockPass());
}
void ARMPassConfig::addPreEmitPass() {
- if (getARMSubtarget().isThumb2()) {
- if (!getARMSubtarget().prefers32BitThumb())
- addPass(createThumb2SizeReductionPass());
+ addPass(createThumb2SizeReductionPass());
- // Constant island pass work on unbundled instructions.
+ // Constant island pass work on unbundled instructions.
+ if (getARMSubtarget().isThumb2())
addPass(&UnpackMachineBundlesID);
- }
addPass(createARMOptimizeBarriersPass());
addPass(createARMConstantIslandPass());
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 7f6a1ee..20ca97b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -30,7 +30,6 @@ public:
} TargetABI;
protected:
- const DataLayout DL;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
ARMSubtarget Subtarget;
bool isLittle;
@@ -45,9 +44,8 @@ public:
bool isLittle);
~ARMBaseTargetMachine() override;
- const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
- const DataLayout *getDataLayout() const override { return &DL; }
bool isLittleEndian() const { return isLittle; }
/// \brief Get the TargetIRAnalysis for this target.
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
index 55a5775..6694b53 100644
--- a/lib/Target/ARM/Android.mk
+++ b/lib/Target/ARM/Android.mk
@@ -4,6 +4,7 @@ arm_codegen_TBLGEN_TABLES := \
ARMGenRegisterInfo.inc \
ARMGenInstrInfo.inc \
ARMGenCodeEmitter.inc \
+ ARMGenCodeEmitter.inc \
ARMGenMCCodeEmitter.inc \
ARMGenMCPseudoLowering.inc \
ARMGenAsmWriter.inc \
@@ -41,10 +42,9 @@ arm_codegen_SRC_FILES := \
MLxExpansionPass.cpp \
Thumb1FrameLowering.cpp \
Thumb1InstrInfo.cpp \
- Thumb1RegisterInfo.cpp \
+ ThumbRegisterInfo.cpp \
Thumb2ITBlockPass.cpp \
Thumb2InstrInfo.cpp \
- Thumb2RegisterInfo.cpp \
Thumb2SizeReduction.cpp
# For the host
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 59461e8..2215efb 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -276,6 +276,9 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasD16() const {
return STI.getFeatureBits() & ARM::FeatureD16;
}
+ bool hasV8_1a() const {
+ return STI.getFeatureBits() & ARM::FeatureV8_1a;
+ }
void SwitchMode() {
uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
@@ -342,10 +345,10 @@ public:
};
- ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser,
+ ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) {
- MCAsmParserExtension::Initialize(_Parser);
+ : STI(STI), MII(MII), UC(Parser) {
+ MCAsmParserExtension::Initialize(Parser);
// Cache the MCRegisterInfo.
MRI = getContext().getRegisterInfo();
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 2530640..0b698197 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -40,10 +40,9 @@ add_llvm_target(ARMCodeGen
MLxExpansionPass.cpp
Thumb1FrameLowering.cpp
Thumb1InstrInfo.cpp
- Thumb1RegisterInfo.cpp
+ ThumbRegisterInfo.cpp
Thumb2ITBlockPass.cpp
Thumb2InstrInfo.cpp
- Thumb2RegisterInfo.cpp
Thumb2SizeReduction.cpp
)
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 16eea33..e15323d 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -637,12 +637,12 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
printRegName(O, MO1.getReg());
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
- unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
O << ", "
<< markup("<imm:")
<< "#"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+ << ARM_AM::getAddrOpcStr(Op)
<< ImmOffs * 4
<< markup(">");
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.def b/lib/Target/ARM/MCTargetDesc/ARMArchName.def
index 9f007a0..96a0c1a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMArchName.def
+++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.def
@@ -30,6 +30,7 @@ ARM_ARCH_NAME("armv5t", ARMV5T, "5T", v5T)
ARM_ARCH_NAME("armv5te", ARMV5TE, "5TE", v5TE)
ARM_ARCH_NAME("armv6", ARMV6, "6", v6)
ARM_ARCH_NAME("armv6j", ARMV6J, "6J", v6)
+ARM_ARCH_NAME("armv6k", ARMV6K, "6K", v6K)
ARM_ARCH_NAME("armv6t2", ARMV6T2, "6T2", v6T2)
ARM_ARCH_NAME("armv6z", ARMV6Z, "6Z", v6KZ)
ARM_ARCH_NAME("armv6zk", ARMV6ZK, "6ZK", v6KZ)
@@ -43,6 +44,8 @@ ARM_ARCH_NAME("armv7-m", ARMV7M, "7-M", v7)
ARM_ARCH_ALIAS("armv7m", ARMV7M)
ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8)
ARM_ARCH_ALIAS("armv8a", ARMV8A)
+ARM_ARCH_NAME("armv8.1-a", ARMV8_1A, "8.1-A", v8)
+ARM_ARCH_ALIAS("armv8.1a", ARMV8_1A)
ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE)
ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE)
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 2b65520..9648ffa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -783,6 +783,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
break;
+ case ARM::ARMV6K:
case ARM::ARMV6Z:
case ARM::ARMV6ZK:
setAttributeItem(ARM_ISA_use, Allowed, false);
@@ -816,6 +817,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
case ARM::ARMV8A:
+ case ARM::ARMV8_1A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -913,9 +915,8 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
setAttributeItem(ARMBuildAttrs::FP_arch,
ARMBuildAttrs::AllowFPARMv8A,
/* OverwriteExisting= */ false);
- setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::AllowNeonARMv8,
- /* OverwriteExisting= */ false);
+ // 'Advanced_SIMD_arch' must be emitted not here, but within
+ // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a()
break;
case ARM::SOFTVFP:
@@ -1362,25 +1363,29 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset,
namespace llvm {
-MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm);
- return S;
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new ARMTargetAsmStreamer(S, OS, *InstPrint, isVerboseAsm);
}
MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
return new ARMTargetStreamer(S);
}
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new ARMTargetELFStreamer(S);
+ return new ARMTargetStreamer(S);
+}
+
MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool IsThumb) {
ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
- new ARMTargetELFStreamer(*S);
// FIXME: This should eventually end up somewhere else where more
// intelligent flag decisions can be made. For now we are just maintaining
// the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 66a1618..caa8736 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -59,6 +59,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
// Exceptions handling
switch (TheTriple.getOS()) {
+ case Triple::Bitrig:
case Triple::NetBSD:
ExceptionsType = ExceptionHandling::DwarfCFI;
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index efbebd3..e48cabb 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -441,14 +441,12 @@ public:
MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, true);
}
MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, false);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 68d32b2..5b90de3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -10,6 +10,7 @@
#include "ARMMCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
using namespace llvm;
#define DEBUG_TYPE "armmcexpr"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 06bf6c9..2be98d2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -26,8 +26,8 @@ private:
const VariantKind Kind;
const MCExpr *Expr;
- explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
- : Kind(_Kind), Expr(_Expr) {}
+ explicit ARMMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
public:
/// @name Construction
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 8c19785..7ff7f9a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -153,6 +153,17 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
// Use CPU to figure out the exact features
ARMArchFeature = "+v8";
break;
+ case Triple::ARMSubArch_v8_1a:
+ if (NoCPU)
+ // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
+ // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
+ // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
+ ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
+ "+trustzone,+t2xtpk,+crypto,+crc";
+ else
+ // Use CPU to figure out the exact features
+ ARMArchFeature = "+v8.1a";
+ break;
case Triple::ARMSubArch_v7m:
isThumb = true;
if (NoCPU)
@@ -195,6 +206,9 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
case Triple::ARMSubArch_v6t2:
ARMArchFeature = "+v6t2";
break;
+ case Triple::ARMSubArch_v6k:
+ ARMArchFeature = "+v6k";
+ break;
case Triple::ARMSubArch_v6m:
isThumb = true;
if (NoCPU)
@@ -295,27 +309,18 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
+ T.getArch() == Triple::thumb);
+}
- switch (TheTriple.getObjectFormat()) {
- default: llvm_unreachable("unsupported object format");
- case Triple::MachO: {
- MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false);
- new ARMTargetStreamer(*S);
- return S;
- }
- case Triple::COFF:
- assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
- return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
- case Triple::ELF:
- return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
- TheTriple.getArch() == Triple::thumb);
- }
+static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool DWARFMustBeAtTheEnd) {
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
@@ -379,61 +384,53 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
// Force static initialization.
extern "C" void LLVMInitializeARMTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheARMLETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn Y(TheARMBETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn A(TheThumbLETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn B(TheThumbBETarget, createARMMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget,
- createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget,
- createARMMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARMBETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheThumbLETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheThumbBETarget, createARMMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheARMLETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARMBETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheThumbLETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheThumbBETarget, createARMMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheARMLETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARMBETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheThumbLETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheThumbBETarget,
- ARM_MC::createARMMCSubtargetInfo);
-
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(TheARMLETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheARMBETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheThumbLETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheThumbBETarget,
- createARMMCInstrAnalysis);
+ for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget,
+ &TheThumbBETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createARMMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T,
+ ARM_MC::createARMMCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
+
+ TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+ TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer);
+ TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer);
+
+ // Register the obj target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createARMObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createARMTargetAsmStreamer);
+
+ // Register the null TargetStreamer.
+ TargetRegistry::RegisterNullTargetStreamer(*T, createARMNullTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createARMMCInstPrinter);
+
+ // Register the MC relocation info.
+ TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo);
+ }
// Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheARMLETarget,
- createARMLEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARMBETarget,
- createARMBEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheThumbLETarget,
- createARMLEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheThumbBETarget,
- createARMBEMCCodeEmitter);
+ for (Target *T : {&TheARMLETarget, &TheThumbLETarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
+ for (Target *T : {&TheARMBETarget, &TheThumbBETarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter);
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend);
@@ -442,44 +439,4 @@ extern "C" void LLVMInitializeARMTargetMC() {
createThumbLEAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget,
createThumbBEAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheARMLETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARMBETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheThumbLETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheThumbBETarget, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheARMLETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheARMBETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer);
-
- // Register the null TargetStreamer.
- TargetRegistry::RegisterNullTargetStreamer(TheARMLETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheARMBETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheThumbLETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheThumbBETarget,
- createARMNullTargetStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheThumbLETarget,
- createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheThumbBETarget,
- createARMMCInstPrinter);
-
- // Register the MC relocation info.
- TargetRegistry::RegisterMCRelocationInfo(TheARMLETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheARMBETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheThumbLETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheThumbBETarget,
- createARMMCRelocationInfo);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index c17e959..7e9ba66 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -32,6 +32,7 @@ class MCRelocationInfo;
class MCTargetStreamer;
class StringRef;
class Target;
+class Triple;
class raw_ostream;
extern Target TheARMLETarget, TheThumbLETarget;
@@ -47,21 +48,20 @@ namespace ARM_MC {
StringRef FS);
}
-MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst);
-
MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S);
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm);
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI);
MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
@@ -80,10 +80,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI
MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which
-/// will generate a PE/COFF object file.
+// Construct a PE/COFF machine code streamer which will generate a PE/COFF
+// object file.
MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- MCCodeEmitter &Emitter, raw_ostream &OS);
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll);
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 593fe34..173cc93 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -72,14 +72,10 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
// opcode when r4 is not in .save directive.
// Compute the consecutive registers from r4 to r11.
- uint32_t Range = 0;
- uint32_t Mask = (1u << 4);
- for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
- if ((RegSave & Bit) == 0u)
- break;
- ++Range;
- Mask |= Bit;
- }
+ uint32_t Mask = RegSave & 0xff0u;
+ uint32_t Range = countTrailingOnes(Mask >> 5); // Exclude r4.
+ // Mask off non-consecutive registers. Keep r4.
+ Mask &= ~(0xffffffe0u << Range);
// Emit this opcode when the mask covers every registers.
uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
@@ -105,50 +101,24 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
/// Emit unwind opcodes for .vsave directives
void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
- size_t i = 32;
-
- while (i > 16) {
- uint32_t Bit = 1u << (i - 1);
- if ((VFPRegSave & Bit) == 0u) {
- --i;
- continue;
- }
-
- uint32_t Range = 0;
-
- --i;
- Bit >>= 1;
-
- while (i > 16 && (VFPRegSave & Bit)) {
- --i;
- ++Range;
- Bit >>= 1;
+ // We only have 4 bits to save the offset in the opcode so look at the lower
+ // and upper 16 bits separately.
+ for (uint32_t Regs : {VFPRegSave & 0xffff0000u, VFPRegSave & 0x0000ffffu}) {
+ while (Regs) {
+ // Now look for a run of set bits. Remember the MSB and LSB of the run.
+ auto RangeMSB = 32 - countLeadingZeros(Regs);
+ auto RangeLen = countLeadingOnes(Regs << (32 - RangeMSB));
+ auto RangeLSB = RangeMSB - RangeLen;
+
+ int Opcode = RangeLSB >= 16
+ ? ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16
+ : ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD;
+
+ EmitInt16(Opcode | ((RangeLSB % 16) << 4) | (RangeLen - 1));
+
+ // Zero out bits we're done with.
+ Regs &= ~(-1u << RangeLSB);
}
-
- EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 |
- ((i - 16) << 4) | Range);
- }
-
- while (i > 0) {
- uint32_t Bit = 1u << (i - 1);
- if ((VFPRegSave & Bit) == 0u) {
- --i;
- continue;
- }
-
- uint32_t Range = 0;
-
- --i;
- Bit >>= 1;
-
- while (i > 0 && (VFPRegSave & Bit)) {
- --i;
- ++Range;
- Bit >>= 1;
- }
-
- EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) |
- Range);
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b344ced..dc707dc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -37,10 +37,10 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
}
}
-namespace llvm {
-MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- MCCodeEmitter &Emitter, raw_ostream &OS) {
- return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS);
-}
+MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
}
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 51e519d..ed2deea 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -382,6 +382,9 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
+ // Only run this for CortexA9.
+ if (!STI->isCortexA9())
+ return false;
isLikeA9 = STI->isLikeA9() || STI->isSwift();
isSwift = STI->isSwift();
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index f4d9be3..2d031d0 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -232,7 +232,7 @@ Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
//===---------------------------------------------------------------------===//
Thumb1 immediate field sometimes keep pre-scaled values. See
-Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
+ThumbRegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
Thumb2.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 7dcc64e..c496cd7 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -41,7 +41,7 @@ static void
emitSPUpdate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const TargetInstrInfo &TII, DebugLoc dl,
- const Thumb1RegisterInfo &MRI,
+ const ThumbRegisterInfo &MRI,
int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
MRI, MIFlags);
@@ -53,8 +53,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -89,13 +89,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
@@ -328,17 +327,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index b785b28..cf93203 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -16,7 +16,7 @@
#include "ARMFrameLowering.h"
#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index c24f740..29aaa15 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -22,8 +22,7 @@
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 9fba760..f3f493d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -15,13 +15,13 @@
#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
#include "ARMBaseInstrInfo.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class Thumb1InstrInfo : public ARMBaseInstrInfo {
- Thumb1RegisterInfo RI;
+ ThumbRegisterInfo RI;
public:
explicit Thumb1InstrInfo(const ARMSubtarget &STI);
@@ -36,7 +36,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const Thumb1RegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b657f2d..7bb2265 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -255,6 +255,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ if (!STI.isThumb2())
+ return false;
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
TRI = STI.getRegisterInfo();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 62c3752..26ca7e9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -30,8 +30,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 46a1f6d..916ab06 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -15,14 +15,14 @@
#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
#include "ARMBaseInstrInfo.h"
-#include "Thumb2RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ScheduleHazardRecognizer;
class Thumb2InstrInfo : public ARMBaseInstrInfo {
- Thumb2RegisterInfo RI;
+ ThumbRegisterInfo RI;
public:
explicit Thumb2InstrInfo(const ARMSubtarget &STI);
@@ -60,7 +60,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI,
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
deleted file mode 100644
index 0d5d85a..0000000
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Thumb2RegisterInfo.h"
-#include "ARM.h"
-#include "ARMSubtarget.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
-
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void
-Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx,
- int Val,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned MIFlags) const {
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineConstantPool *ConstantPool = MF.getConstantPool();
- const Constant *C = ConstantInt::get(
- Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
-
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
- .setMIFlags(MIFlags);
-}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
deleted file mode 100644
index 1dd94cc..0000000
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
-#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
-
-#include "ARMBaseRegisterInfo.h"
-
-namespace llvm {
-
-class ARMSubtarget;
-
-struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
-public:
- Thumb2RegisterInfo(const ARMSubtarget &STI);
-
- /// emitLoadConstPool - Emits a load from constpool to materialize the
- /// specified immediate.
- void
- emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0,
- unsigned MIFlags = MachineInstr::NoFlags) const override;
-};
-}
-
-#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 2ee908b..e967e53 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Function.h" // To access Function attributes
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -1002,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ if (STI->isThumb1Only() || STI->prefers32BitThumb())
+ return false;
+
TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
// Optimizing / minimizing size?
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index 5e2cbdc..b5f9d7e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===//
+//===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
@@ -38,39 +38,35 @@ extern cl::opt<bool> ReuseFrameIndexVals;
using namespace llvm;
-Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
+ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {}
+
+const TargetRegisterClass *
+ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
-const TargetRegisterClass*
-Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
- const {
if (ARM::tGPRRegClass.hasSubClassEq(RC))
return &ARM::tGPRRegClass;
- return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
}
const TargetRegisterClass *
-Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
- const {
+ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+ return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind);
return &ARM::tGPRRegClass;
}
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void
-Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx,
- int Val,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned MIFlags) const {
- assert((isARMLowRegister(DestReg) ||
- isVirtualRegister(DestReg)) &&
- "Thumb1 does not have ldr to high register");
-
+static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg,
+ unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) {
MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
@@ -83,6 +79,42 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.setMIFlags(MIFlags);
}
+static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg,
+ unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+ .setMIFlags(MIFlags);
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ThumbRegisterInfo::emitLoadConstPool(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (STI.isThumb1Only()) {
+ assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) &&
+ "Thumb1 does not have ldr to high register");
+ return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+ PredReg, MIFlags);
+ }
+ return emitThumb2LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+ PredReg, MIFlags);
+}
/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
@@ -317,12 +349,14 @@ static unsigned convertToNonSPOpcode(unsigned Opcode) {
return Opcode;
}
-bool Thumb1RegisterInfo::
-rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
- const ARMBaseInstrInfo &TII) const {
+bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
+ unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
+ assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() &&
+ "This isn't needed for thumb2!");
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
unsigned Opcode = MI.getOpcode();
@@ -386,8 +420,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
return Offset == 0;
}
-void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::resolveFrameIndex(MI, BaseReg, Offset);
+
const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -403,12 +442,15 @@ void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
/// saveScavengerRegister - Spill the register so it can be used by the
/// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
+bool ThumbRegisterInfo::saveScavengerRegister(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC,
+ unsigned Reg) const {
+
+ const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg);
+
// Thumb1 can't use the emergency spill slot on the stack because
// ldr/str immediate offsets must be positive, and if we're referencing
// off the frame pointer (if, for example, there are alloca() calls in
@@ -452,14 +494,18 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
return true;
}
-void
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
- unsigned VReg = 0;
+void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum,
+ RS);
+
+ unsigned VReg = 0;
const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h
index 5feaf52..23aaff3 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/ThumbRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo
-// class.
+// This file contains the Thumb implementation of the TargetRegisterInfo
+// class. With the exception of emitLoadConstPool Thumb2 tracks
+// ARMBaseRegisterInfo, Thumb1 overloads the functions below.
//
//===----------------------------------------------------------------------===//
@@ -22,12 +23,13 @@ namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
-struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+struct ThumbRegisterInfo : public ARMBaseRegisterInfo {
public:
- Thumb1RegisterInfo(const ARMSubtarget &STI);
+ ThumbRegisterInfo();
const TargetRegisterClass *
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 07f62a9..b91b0e1 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -17,16 +17,16 @@
#include "BPFSubtarget.h"
#include "BPFTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "bpf-isel"
diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h
index 04d7908..5a6f0f7 100644
--- a/lib/Target/BPF/BPFISelLowering.h
+++ b/lib/Target/BPF/BPFISelLowering.h
@@ -20,6 +20,7 @@
#include "llvm/Target/TargetLowering.h"
namespace llvm {
+class BPFSubtarget;
namespace BPFISD {
enum {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
diff --git a/lib/Target/BPF/BPFRegisterInfo.h b/lib/Target/BPF/BPFRegisterInfo.h
index 364d6f6..7072dd0 100644
--- a/lib/Target/BPF/BPFRegisterInfo.h
+++ b/lib/Target/BPF/BPFRegisterInfo.h
@@ -25,8 +25,7 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
BPFRegisterInfo();
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 5245395..9487427 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -35,9 +35,9 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, "e-m:e-p:64:64-i64:64-n32:64-S128", TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
- DL("e-m:e-p:64:64-i64:64-n32:64-S128"),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/BPF/BPFTargetMachine.h b/lib/Target/BPF/BPFTargetMachine.h
index 821cffc..6aeafb9 100644
--- a/lib/Target/BPF/BPFTargetMachine.h
+++ b/lib/Target/BPF/BPFTargetMachine.h
@@ -20,7 +20,6 @@
namespace llvm {
class BPFTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL;
BPFSubtarget Subtarget;
public:
@@ -28,8 +27,10 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
- const DataLayout *getDataLayout() const override { return &DL; }
- const BPFSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const BPFSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const BPFSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index b94693a..9c51d66 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -60,7 +60,6 @@ public:
MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new BPFMCCodeEmitter(MRI);
}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index f82f009..fd04001 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -61,13 +61,11 @@ static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCStreamer *createBPFMCStreamer(const Target &T, StringRef TT,
+static MCStreamer *createBPFMCStreamer(const Triple &T,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
}
static MCInstPrinter *
@@ -104,7 +102,7 @@ extern "C" void LLVMInitializeBPFTargetMC() {
TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend);
// Register the object streamer
- TargetRegistry::RegisterMCObjectStreamer(TheBPFTarget, createBPFMCStreamer);
+ TargetRegistry::RegisterELFStreamer(TheBPFTarget, createBPFMCStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheBPFTarget, createBPFMCInstPrinter);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 55901cc..1fd2bec 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -33,7 +33,6 @@ extern Target TheBPFTarget;
MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 818a992..87716e6 100644
--- a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -14,5 +14,5 @@ using namespace llvm;
Target llvm::TheBPFTarget;
extern "C" void LLVMInitializeBPFTargetInfo() {
- RegisterTarget<Triple::bpf> X(TheBPFTarget, "bpf", "BPF");
+ RegisterTarget<Triple::bpf, /*HasJIT=*/true> X(TheBPFTarget, "bpf", "BPF");
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index c7fec52..d0e2010 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1981,7 +1981,8 @@ void CppWriter::printModule(const std::string& fname,
printEscapedString(mName);
Out << "\", getGlobalContext());";
if (!TheModule->getTargetTriple().empty()) {
- nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayoutStr()
+ << "\");";
}
if (!TheModule->getTargetTriple().empty()) {
nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 4bae7f8..678a932 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -22,20 +22,13 @@ namespace llvm {
class formatted_raw_ostream;
-class CPPSubtarget : public TargetSubtargetInfo {
-};
-
struct CPPTargetMachine : public TargetMachine {
- CPPTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : TargetMachine(T, TT, CPU, FS, Options), Subtarget() {}
-private:
- CPPSubtarget Subtarget;
+ CPPTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL)
+ : TargetMachine(T, "", TT, CPU, FS, Options) {}
public:
- const CPPSubtarget *getSubtargetImpl() const override { return &Subtarget; }
bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
CodeGenFileType FileType, bool DisableVerify,
AnalysisID StartAfter,
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index eaa8bef..c6ffb96 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -31,7 +31,6 @@ add_llvm_target(HexagonCodeGen
HexagonRemoveSZExtArgs.cpp
HexagonSelectionDAGInfo.cpp
HexagonSplitConst32AndConst64.cpp
- HexagonSplitTFRCondSets.cpp
HexagonSubtarget.cpp
HexagonTargetMachine.cpp
HexagonTargetObjectFile.cpp
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index e0a3b2f..dfe79f9 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -36,7 +36,6 @@ namespace llvm {
FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
FunctionPass *createHexagonCFGOptimizer();
- FunctionPass *createHexagonSplitTFRCondSets();
FunctionPass *createHexagonSplitConst32AndConst64();
FunctionPass *createHexagonExpandPredSpillCode();
FunctionPass *createHexagonHardwareLoops();
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index f892c9f..53a687c 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -28,10 +28,10 @@ def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def NoV5T : Predicate<"!Subtarget->hasV5TOps()">;
-def UseMEMOP : Predicate<"Subtarget->useMemOps()">;
-def IEEERndNearV5T : Predicate<"Subtarget->modeIEEERndNear()">;
+def HasV5T : Predicate<"HST->hasV5TOps()">;
+def NoV5T : Predicate<"!HST->hasV5TOps()">;
+def UseMEMOP : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -168,14 +168,6 @@ def getRegForm : InstrMapping {
let ValueCols = [["reg"]];
}
-def getRegShlForm : InstrMapping {
- let FilterClass = "ImmRegShl";
- let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
- let ColFields = ["InputType"];
- let KeyCol = ["imm"];
- let ValueCols = [["reg"]];
-}
-
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index dd193f9..5a26045 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -127,12 +127,21 @@ static bool isCombinableInstType(MachineInstr *MI,
case Hexagon::A2_tfrsi: {
// A transfer-immediate can be combined if its argument is a signed 8bit
// value.
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
- unsigned DestReg = MI->getOperand(0).getReg();
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg());
+
+ unsigned DestReg = Op0.getReg();
+ // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a
+ // workaround for an ABI bug that prevents GOT relocations on combine
+ // instructions
+ if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG)
+ return false;
- // Only combine constant extended TFRI if we are in aggressive mode.
+ // Only combine constant extended A2_tfrsi if we are in aggressive mode.
+ bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm());
return Hexagon::IntRegsRegClass.contains(DestReg) &&
- (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm()));
+ (ShouldCombineAggressively || NotExt);
}
case Hexagon::TFRI_V4: {
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 8176598..40059fb 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -79,7 +79,166 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
++MII) {
MachineInstr *MI = MII;
int Opc = MI->getOpcode();
- if (Opc == Hexagon::STriw_pred) {
+ if (Opc == Hexagon::S2_storerb_pci_pseudo ||
+ Opc == Hexagon::S2_storerh_pci_pseudo ||
+ Opc == Hexagon::S2_storeri_pci_pseudo ||
+ Opc == Hexagon::S2_storerd_pci_pseudo ||
+ Opc == Hexagon::S2_storerf_pci_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::S2_storerd_pci_pseudo)
+ Opcode = Hexagon::S2_storerd_pci;
+ else if (Opc == Hexagon::S2_storeri_pci_pseudo)
+ Opcode = Hexagon::S2_storeri_pci;
+ else if (Opc == Hexagon::S2_storerh_pci_pseudo)
+ Opcode = Hexagon::S2_storerh_pci;
+ else if (Opc == Hexagon::S2_storerf_pci_pseudo)
+ Opcode = Hexagon::S2_storerf_pci;
+ else if (Opc == Hexagon::S2_storerb_pci_pseudo)
+ Opcode = Hexagon::S2_storerb_pci;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+ MachineOperand &Op4 = MI->getOperand(4);
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op3);
+ // Replace the pseude circ_ldd by the real circ_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op4);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ NewMI->addOperand(Op2);
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::L2_loadrd_pci_pseudo ||
+ Opc == Hexagon::L2_loadri_pci_pseudo ||
+ Opc == Hexagon::L2_loadrh_pci_pseudo ||
+ Opc == Hexagon::L2_loadruh_pci_pseudo||
+ Opc == Hexagon::L2_loadrb_pci_pseudo ||
+ Opc == Hexagon::L2_loadrub_pci_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::L2_loadrd_pci_pseudo)
+ Opcode = Hexagon::L2_loadrd_pci;
+ else if (Opc == Hexagon::L2_loadri_pci_pseudo)
+ Opcode = Hexagon::L2_loadri_pci;
+ else if (Opc == Hexagon::L2_loadrh_pci_pseudo)
+ Opcode = Hexagon::L2_loadrh_pci;
+ else if (Opc == Hexagon::L2_loadruh_pci_pseudo)
+ Opcode = Hexagon::L2_loadruh_pci;
+ else if (Opc == Hexagon::L2_loadrb_pci_pseudo)
+ Opcode = Hexagon::L2_loadrb_pci;
+ else if (Opc == Hexagon::L2_loadrub_pci_pseudo)
+ Opcode = Hexagon::L2_loadrub_pci;
+ else
+ llvm_unreachable("wrong Opc");
+
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+ MachineOperand &Op5 = MI->getOperand(5);
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op4);
+ // Replace the pseude circ_ldd by the real circ_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op2);
+ NewMI->addOperand(Op5);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo ||
+ Opc == Hexagon::L2_loadri_pbr_pseudo ||
+ Opc == Hexagon::L2_loadrh_pbr_pseudo ||
+ Opc == Hexagon::L2_loadruh_pbr_pseudo||
+ Opc == Hexagon::L2_loadrb_pbr_pseudo ||
+ Opc == Hexagon::L2_loadrub_pbr_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::L2_loadrd_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrd_pbr;
+ else if (Opc == Hexagon::L2_loadri_pbr_pseudo)
+ Opcode = Hexagon::L2_loadri_pbr;
+ else if (Opc == Hexagon::L2_loadrh_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrh_pbr;
+ else if (Opc == Hexagon::L2_loadruh_pbr_pseudo)
+ Opcode = Hexagon::L2_loadruh_pbr;
+ else if (Opc == Hexagon::L2_loadrb_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrb_pbr;
+ else if (Opc == Hexagon::L2_loadrub_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrub_pbr;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op4);
+ // Replace the pseudo brev_ldd by the real brev_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op2);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::S2_storerd_pbr_pseudo ||
+ Opc == Hexagon::S2_storeri_pbr_pseudo ||
+ Opc == Hexagon::S2_storerh_pbr_pseudo ||
+ Opc == Hexagon::S2_storerb_pbr_pseudo ||
+ Opc == Hexagon::S2_storerf_pbr_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::S2_storerd_pbr_pseudo)
+ Opcode = Hexagon::S2_storerd_pbr;
+ else if (Opc == Hexagon::S2_storeri_pbr_pseudo)
+ Opcode = Hexagon::S2_storeri_pbr;
+ else if (Opc == Hexagon::S2_storerh_pbr_pseudo)
+ Opcode = Hexagon::S2_storerh_pbr;
+ else if (Opc == Hexagon::S2_storerf_pbr_pseudo)
+ Opcode = Hexagon::S2_storerf_pbr;
+ else if (Opc == Hexagon::S2_storerb_pbr_pseudo)
+ Opcode = Hexagon::S2_storerb_pbr;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op3);
+ // Replace the pseudo brev_ldd by the real brev_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ NewMI->addOperand(Op2);
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::STriw_pred) {
// STriw_pred [R30], ofst, SrcReg;
unsigned FP = MI->getOperand(0).getReg();
assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 2b1992f..65d689b 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -140,7 +140,7 @@ bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
unsigned RetOpcode = MBBI->getOpcode();
- return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;
+ return RetOpcode == Hexagon::TCRETURNi || RetOpcode == Hexagon::TCRETURNr;
}
void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1577c33..c47ee9c 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -690,7 +690,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// If the induction variable bump is not a power of 2, quit.
// Othwerise we'd need a general integer division.
- if (!isPowerOf2_64(abs64(IVBump)))
+ if (!isPowerOf2_64(std::abs(IVBump)))
return nullptr;
MachineBasicBlock *PH = Loop->getLoopPreheader();
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index fb056b5..aaccac8 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -45,37 +45,25 @@ namespace llvm {
///
namespace {
class HexagonDAGToDAGISel : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const HexagonSubtarget *Subtarget;
-
- // Keep a reference to HexagonTargetMachine.
- const HexagonTargetMachine& TM;
- DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
+ const HexagonTargetMachine& HTM;
+ const HexagonSubtarget *HST;
public:
- explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(targetmachine, OptLevel), TM(targetmachine) {
+ : SelectionDAGISel(tm, OptLevel), HTM(tm) {
initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
- bool hasNumUsesBelowThresGA(SDNode *N) const;
- SDNode *Select(SDNode *N) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
- // Complex Pattern Selectors.
- inline bool foldGlobalAddress(SDValue &N, SDValue &R);
- inline bool foldGlobalAddressGP(SDValue &N, SDValue &R);
- bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP);
- bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+ virtual void PreprocessISelDAG() override;
+
+ SDNode *Select(SDNode *N) override;
// Complex Pattern Selectors.
inline bool SelectAddrGA(SDValue &N, SDValue &R);
@@ -87,18 +75,12 @@ public:
return "Hexagon DAG->DAG Pattern Instruction Selection";
}
- bool runOnMachineFunction(MachineFunction &MF) override {
- Subtarget = &MF.getSubtarget<HexagonSubtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
- }
-
+ SDNode *SelectFrameIndex(SDNode *N);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
- bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
-
SDNode *SelectLoad(SDNode *N);
SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl);
@@ -110,99 +92,98 @@ public:
SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl);
SDNode *SelectStore(SDNode *N);
SDNode *SelectSHL(SDNode *N);
- SDNode *SelectSelect(SDNode *N);
- SDNode *SelectTruncate(SDNode *N);
SDNode *SelectMul(SDNode *N);
SDNode *SelectZeroExtend(SDNode *N);
- SDNode *SelectIntrinsicWOChain(SDNode *N);
SDNode *SelectIntrinsicWChain(SDNode *N);
+ SDNode *SelectIntrinsicWOChain(SDNode *N);
SDNode *SelectConstant(SDNode *N);
SDNode *SelectConstantFP(SDNode *N);
SDNode *SelectAdd(SDNode *N);
- bool isConstExtProfitable(SDNode *N) const;
-
-// XformMskToBitPosU5Imm - Returns the bit position which
-// the single bit 32 bit mask represents.
-// Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
- int32_t bitPos;
- bitPos = Log2_32(Imm);
- assert(bitPos >= 0 && bitPos < 32 &&
- "Constant out of range for 32 BitPos Memops");
- return CurDAG->getTargetConstant(bitPos, MVT::i32);
-}
-
-// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
-// mask represents. Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
- return XformMskToBitPosU5Imm(Imm);
-}
+ SDNode *SelectBitOp(SDNode *N);
+
+ // XformMskToBitPosU5Imm - Returns the bit position which
+ // the single bit 32 bit mask represents.
+ // Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
+ int32_t bitPos;
+ bitPos = Log2_32(Imm);
+ assert(bitPos >= 0 && bitPos < 32 &&
+ "Constant out of range for 32 BitPos Memops");
+ return CurDAG->getTargetConstant(bitPos, MVT::i32);
+ }
-// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
-// mask represents. Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
- return XformMskToBitPosU5Imm(Imm);
-}
+ // XformMskToBitPosU4Imm - Returns the bit position which the single-bit
+ // 16 bit mask represents. Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+ }
-// Return true if there is exactly one bit set in V, i.e., if V is one of the
-// following integers: 2^0, 2^1, ..., 2^31.
-bool ImmIsSingleBit(uint32_t v) const {
- return isPowerOf2_32(v);
-}
+ // XformMskToBitPosU3Imm - Returns the bit position which the single-bit
+ // 8 bit mask represents. Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+ }
-// XformM5ToU5Imm - Return a target constant with the specified value, of type
-// i32 where the negative literal is transformed into a positive literal for
-// use in -= memops.
-inline SDValue XformM5ToU5Imm(signed Imm) {
- assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
- return CurDAG->getTargetConstant( - Imm, MVT::i32);
-}
+ // Return true if there is exactly one bit set in V, i.e., if V is one of the
+ // following integers: 2^0, 2^1, ..., 2^31.
+ bool ImmIsSingleBit(uint32_t v) const {
+ return isPowerOf2_32(v);
+ }
+ // XformM5ToU5Imm - Return a target constant with the specified value, of
+ // type i32 where the negative literal is transformed into a positive literal
+ // for use in -= memops.
+ inline SDValue XformM5ToU5Imm(signed Imm) {
+ assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant( - Imm, MVT::i32);
+ }
-// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
-// [1..128], used in cmpb.gtu instructions.
-inline SDValue XformU7ToU7M1Imm(signed Imm) {
- assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
- return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
-}
+ // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+ // [1..128], used in cmpb.gtu instructions.
+ inline SDValue XformU7ToU7M1Imm(signed Imm) {
+ assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
+ }
-// XformS8ToS8M1Imm - Return a target constant decremented by 1.
-inline SDValue XformSToSM1Imm(signed Imm) {
- return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
-}
+ // XformS8ToS8M1Imm - Return a target constant decremented by 1.
+ inline SDValue XformSToSM1Imm(signed Imm) {
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+ }
-// XformU8ToU8M1Imm - Return a target constant decremented by 1.
-inline SDValue XformUToUM1Imm(unsigned Imm) {
- assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
- return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
-}
+ // XformU8ToU8M1Imm - Return a target constant decremented by 1.
+ inline SDValue XformUToUM1Imm(unsigned Imm) {
+ assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+ }
-// XformSToSM2Imm - Return a target constant decremented by 2.
-inline SDValue XformSToSM2Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm - 2, MVT::i32);
-}
+ // XformSToSM2Imm - Return a target constant decremented by 2.
+ inline SDValue XformSToSM2Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm - 2, MVT::i32);
+ }
-// XformSToSM3Imm - Return a target constant decremented by 3.
-inline SDValue XformSToSM3Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm - 3, MVT::i32);
-}
+ // XformSToSM3Imm - Return a target constant decremented by 3.
+ inline SDValue XformSToSM3Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm - 3, MVT::i32);
+ }
-// Include the pieces autogenerated from the target description.
-#include "HexagonGenDAGISel.inc"
+ // Include the pieces autogenerated from the target description.
+ #include "HexagonGenDAGISel.inc"
private:
- bool isValueExtension(SDValue const &Val, unsigned FromBits, SDValue &Src);
-};
+ bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
+}; // end HexagonDAGToDAGISel
} // end anonymous namespace
/// createHexagonISelDag - This pass converts a legalized DAG into a
/// Hexagon-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+namespace llvm {
+FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
return new HexagonDAGToDAGISel(TM, OptLevel);
}
+}
static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
@@ -216,76 +197,6 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
}
-static bool IsS11_0_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<11>(v);
-}
-
-
-static bool IsS11_1_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,1>(v);
-}
-
-
-static bool IsS11_2_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,2>(v);
-}
-
-
-static bool IsS11_3_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,3>(v);
-}
-
-
-static bool IsU6_0_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<6>(v);
-}
-
-
-static bool IsU6_1_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,1>(v);
-}
-
-
-static bool IsU6_2_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,2>(v);
-}
-
-
// Intrinsics that return a a predicate.
static unsigned doesIntrinsicReturnPredicate(unsigned ID)
{
@@ -332,216 +243,119 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID)
}
}
-static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
- if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
- return true;
- }
- if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
- return true;
- }
- if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
- return true;
- }
- if (MemType == MVT::i8 && isInt<11>(Offset)) {
- return true;
- }
- return false;
-}
-
-
-//
-// Try to lower loads of GlobalAdresses into base+offset loads. Custom
-// lowering for GlobalAddress nodes has already turned it into a
-// CONST32.
-//
-SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) {
- SDValue Chain = LD->getChain();
- SDNode* Const32 = LD->getBasePtr().getNode();
- unsigned Opcode = 0;
-
- if (Const32->getOpcode() == HexagonISD::CONST32 &&
- ISD::isNormalLoad(LD)) {
- SDValue Base = Const32->getOperand(0);
- EVT LoadedVT = LD->getMemoryVT();
- int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
- if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
- MVT PointerTy = getTargetLowering()->getPointerTy();
- const GlobalValue* GV =
- cast<GlobalAddressSDNode>(Base)->getGlobal();
- SDValue TargAddr =
- CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
- SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
- dl, PointerTy,
- TargAddr);
- // Figure out base + offset opcode
- if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io;
- else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io;
- else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io;
- else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io;
- else llvm_unreachable("unknown memory type");
-
- // Build indexed load.
- SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
- SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
- LD->getValueType(0),
- MVT::Other,
- SDValue(NewBase,0),
- TargetConstOff,
- Chain);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- ReplaceUses(LD, Result);
- return Result;
- }
- }
-
- return SelectCode(LD);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
unsigned Opcode,
- SDLoc dl)
-{
+ SDLoc dl) {
SDValue Chain = LD->getChain();
EVT LoadedVT = LD->getMemoryVT();
SDValue Base = LD->getBasePtr();
SDValue Offset = LD->getOffset();
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- SDValue N1 = LD->getOperand(1);
- SDValue CPTmpN1_0;
- SDValue CPTmpN1_1;
-
- if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
- N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII = Subtarget->getInstrInfo();
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
- SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
- MVT::Other, Base, TargetConst,
- Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
- SDValue(Result_1, 0));
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
- const SDValue Froms[] = { SDValue(LD, 0),
- SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_2, 0),
- SDValue(Result_1, 1),
- SDValue(Result_1, 2)
- };
- ReplaceUses(Froms, Tos, 3);
- return Result_2;
- }
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::Other, Base, TargetConst0,
+
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Base, TargetConst,
Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl,
- MVT::i64, SDValue(Result_1, 0));
- SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl,
- MVT::i32, Base, TargetConstVal,
- SDValue(Result_1, 1));
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+ SDValue(Result_1, 0));
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
const SDValue Froms[] = { SDValue(LD, 0),
SDValue(LD, 1),
- SDValue(LD, 2)
- };
+ SDValue(LD, 2) };
const SDValue Tos[] = { SDValue(Result_2, 0),
- SDValue(Result_3, 0),
- SDValue(Result_1, 1)
- };
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2) };
ReplaceUses(Froms, Tos, 3);
return Result_2;
}
- return SelectCode(LD);
+
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
+ Base, TargetConst0, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_3, 0),
+ SDValue(Result_1, 1) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
}
SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
unsigned Opcode,
- SDLoc dl)
-{
+ SDLoc dl) {
SDValue Chain = LD->getChain();
EVT LoadedVT = LD->getMemoryVT();
SDValue Base = LD->getBasePtr();
SDValue Offset = LD->getOffset();
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- SDValue N1 = LD->getOperand(1);
- SDValue CPTmpN1_0;
- SDValue CPTmpN1_1;
-
- if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
- N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII = Subtarget->getInstrInfo();
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::i32, MVT::Other, Base,
- TargetConstVal, Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
- MVT::i64, MVT::Other,
- SDValue(Result_2,0),
- SDValue(Result_1,0));
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
- const SDValue Froms[] = { SDValue(LD, 0),
- SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_3, 0),
- SDValue(Result_1, 1),
- SDValue(Result_1, 2)
- };
- ReplaceUses(Froms, Tos, 3);
- return Result_3;
- }
- // Generate an indirect load.
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::Other,
- Base, TargetConst0, Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
MVT::i64, MVT::Other,
- SDValue(Result_2,0),
+ TargetConst0,
SDValue(Result_1,0));
- // Add offset to base.
- SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
- Base, TargetConstVal,
- SDValue(Result_1, 1));
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
const SDValue Froms[] = { SDValue(LD, 0),
SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_3, 0), // Load value.
- SDValue(Result_4, 0), // New address.
- SDValue(Result_1, 1)
- };
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2) };
ReplaceUses(Froms, Tos, 3);
- return Result_3;
+ return Result_2;
}
- return SelectCode(LD);
+ // Generate an indirect load.
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
+ MVT::i64, MVT::Other,
+ TargetConst0,
+ SDValue(Result_1,0));
+ // Add offset to base.
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0), // Load value.
+ SDValue(Result_3, 0), // New address.
+ SDValue(Result_1, 1) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
}
@@ -555,45 +369,44 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
EVT LoadedVT = LD->getMemoryVT();
unsigned Opcode = 0;
- // Check for zero ext loads.
- bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+ // Check for zero extended loads. Treat any-extend loads as zero extended
+ // loads.
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
// Figure out the opcode.
- const HexagonInstrInfo *TII = Subtarget->getInstrInfo();
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
if (LoadedVT == MVT::i64) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadrd_pi;
else
Opcode = Hexagon::L2_loadrd_io;
} else if (LoadedVT == MVT::i32) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadri_pi;
else
Opcode = Hexagon::L2_loadri_io;
} else if (LoadedVT == MVT::i16) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
+ Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
else
- Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
+ Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
} else if (LoadedVT == MVT::i8) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
+ Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
else
- Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+ Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
} else
llvm_unreachable("unknown memory type");
- // For zero ext i64 loads, we need to add combine instructions.
- if (LD->getValueType(0) == MVT::i64 &&
- LD->getExtensionType() == ISD::ZEXTLOAD) {
+ // For zero extended i64 loads, we need to add combine instructions.
+ if (LD->getValueType(0) == MVT::i64 && IsZeroExt)
return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
- }
- if (LD->getValueType(0) == MVT::i64 &&
- LD->getExtensionType() == ISD::SEXTLOAD) {
- // Handle sign ext i64 loads.
+ // Handle sign extended i64 loads.
+ if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
- }
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
LD->getValueType(0),
@@ -649,7 +462,7 @@ SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
if (AM != ISD::UNINDEXED) {
result = SelectIndexedLoad(LD, dl);
} else {
- result = SelectBaseOffsetLoad(LD, dl);
+ result = SelectCode(LD);
}
return result;
@@ -665,13 +478,12 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
// Get the constant value.
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
EVT StoredVT = ST->getMemoryVT();
+ EVT ValueVT = Value.getValueType();
// Offset value must be within representable range
// and must have correct alignment properties.
- const HexagonInstrInfo *TII = Subtarget->getInstrInfo();
- if (TII->isValidAutoIncImm(StoredVT, Val)) {
- SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
- Chain};
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(StoredVT, Val)) {
unsigned Opcode = 0;
// Figure out the post inc version of opcode.
@@ -681,6 +493,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
else llvm_unreachable("unknown memory type");
+ if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
+ assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
+ Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg,
+ dl, MVT::i32, Value);
+ }
+ SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
+ Chain};
// Build post increment store.
SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
MVT::Other, Ops);
@@ -694,7 +513,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
}
// Note: Order of operands matches the def of instruction:
- // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+ // def S2_storerd_io
+ // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ...
// and it differs for POST_ST* for instance.
SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
Chain};
@@ -724,61 +544,6 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
return Result_2;
}
-
-SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
- SDLoc dl) {
- SDValue Chain = ST->getChain();
- SDNode* Const32 = ST->getBasePtr().getNode();
- SDValue Value = ST->getValue();
- unsigned Opcode = 0;
-
- // Try to lower stores of GlobalAdresses into indexed stores. Custom
- // lowering for GlobalAddress nodes has already turned it into a
- // CONST32. Avoid truncating stores for the moment. Post-inc stores
- // do the same. Don't think there's a reason for it, so will file a
- // bug to fix.
- if ((Const32->getOpcode() == HexagonISD::CONST32) &&
- !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
- SDValue Base = Const32->getOperand(0);
- if (Base.getOpcode() == ISD::TargetGlobalAddress) {
- EVT StoredVT = ST->getMemoryVT();
- int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
- if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
- MVT PointerTy = getTargetLowering()->getPointerTy();
- const GlobalValue* GV =
- cast<GlobalAddressSDNode>(Base)->getGlobal();
- SDValue TargAddr =
- CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
- SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
- dl, PointerTy,
- TargAddr);
-
- // Figure out base + offset opcode
- if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io;
- else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
- else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
- else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
- else llvm_unreachable("unknown memory type");
-
- SDValue Ops[] = {SDValue(NewBase,0),
- CurDAG->getTargetConstant(Offset,PointerTy),
- Value, Chain};
- // build indexed store
- SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = ST->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- ReplaceUses(ST, Result);
- return Result;
- }
- }
- }
-
- return SelectCode(ST);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
SDLoc dl(N);
StoreSDNode *ST = cast<StoreSDNode>(N);
@@ -789,7 +554,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
return SelectIndexedStore(ST, dl);
}
- return SelectBaseOffsetStore(ST, dl);
+ return SelectCode(ST);
}
SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
@@ -875,187 +640,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
return SelectCode(N);
}
-
-SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
- SDLoc dl(N);
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() == ISD::SETCC) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
- SDValue N000 = N00.getOperand(0);
- SDValue N001 = N00.getOperand(1);
- if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
- SDValue N01 = N0.getOperand(1);
- SDValue N02 = N0.getOperand(2);
-
- // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
- // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
- // IntRegs:i32:$src2)
- // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
- // Pattern complexity = 9 cost = 1 size = 0.
- if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
- SDValue N1 = N->getOperand(1);
- if (N01 == N1) {
- SDValue N2 = N->getOperand(2);
- if (N000 == N2 &&
- N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
- N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
- SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
- MVT::i32, N000);
- SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl,
- MVT::i32,
- SDValue(SextNode, 0),
- N1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
- }
-
- // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
- // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
- // IntRegs:i32:$src2)
- // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
- // Pattern complexity = 9 cost = 1 size = 0.
- if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
- SDValue N1 = N->getOperand(1);
- if (N01 == N1) {
- SDValue N2 = N->getOperand(2);
- if (N000 == N2 &&
- N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
- N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
- SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
- MVT::i32, N000);
- SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl,
- MVT::i32,
- SDValue(SextNode, 0),
- N1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
- }
- }
- }
- }
-
- return SelectCode(N);
-}
-
-
-SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
- SDLoc dl(N);
- SDValue Shift = N->getOperand(0);
-
- //
- // %conv.i = sext i32 %tmp1 to i64
- // %conv2.i = sext i32 %add to i64
- // %mul.i = mul nsw i64 %conv2.i, %conv.i
- // %shr5.i = lshr i64 %mul.i, 32
- // %conv3.i = trunc i64 %shr5.i to i32
- //
- // --- match with the following ---
- //
- // %conv3.i = mpy (%tmp1, %add)
- //
- // Trunc to i32.
- if (N->getValueType(0) == MVT::i32) {
- // Trunc from i64.
- if (Shift.getNode()->getValueType(0) == MVT::i64) {
- // Trunc child is logical shift right.
- if (Shift.getOpcode() != ISD::SRL) {
- return SelectCode(N);
- }
-
- SDValue ShiftOp0 = Shift.getOperand(0);
- SDValue ShiftOp1 = Shift.getOperand(1);
-
- // Shift by const 32
- if (ShiftOp1.getOpcode() != ISD::Constant) {
- return SelectCode(N);
- }
-
- int32_t ShiftConst =
- cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
- if (ShiftConst != 32) {
- return SelectCode(N);
- }
-
- // Shifting a i64 signed multiply
- SDValue Mul = ShiftOp0;
- if (Mul.getOpcode() != ISD::MUL) {
- return SelectCode(N);
- }
-
- SDValue MulOp0 = Mul.getOperand(0);
- SDValue MulOp1 = Mul.getOperand(1);
-
- SDValue OP0;
- SDValue OP1;
-
- // Handle sign_extend and sextload
- if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Sext0 = MulOp0.getOperand(0);
- if (Sext0.getNode()->getValueType(0) != MVT::i32) {
- return SelectCode(N);
- }
-
- OP0 = Sext0;
- } else if (MulOp0.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
- if (LD->getMemoryVT() != MVT::i32 ||
- LD->getExtensionType() != ISD::SEXTLOAD ||
- LD->getAddressingMode() != ISD::UNINDEXED) {
- return SelectCode(N);
- }
-
- SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
- MVT::Other,
- LD->getBasePtr(),
- TargetConst0, Chain), 0);
- } else {
- return SelectCode(N);
- }
-
- // Same goes for the second operand.
- if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Sext1 = MulOp1.getOperand(0);
- if (Sext1.getNode()->getValueType(0) != MVT::i32)
- return SelectCode(N);
-
- OP1 = Sext1;
- } else if (MulOp1.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
- if (LD->getMemoryVT() != MVT::i32 ||
- LD->getExtensionType() != ISD::SEXTLOAD ||
- LD->getAddressingMode() != ISD::UNINDEXED) {
- return SelectCode(N);
- }
-
- SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
- MVT::Other,
- LD->getBasePtr(),
- TargetConst0, Chain), 0);
- } else {
- return SelectCode(N);
- }
-
- // Generate a mpy instruction.
- SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32,
- OP0, OP1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
-
- return SelectCode(N);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
SDLoc dl(N);
if (N->getValueType(0) == MVT::i32) {
@@ -1134,6 +718,36 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
//
SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
SDLoc dl(N);
+
+ SDValue Op0 = N->getOperand(0);
+ EVT OpVT = Op0.getValueType();
+ unsigned OpBW = OpVT.getSizeInBits();
+
+ // Special handling for zero-extending a vector of booleans.
+ if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) {
+ SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
+ unsigned NE = OpVT.getVectorNumElements();
+ EVT ExVT = N->getValueType(0);
+ unsigned ES = ExVT.getVectorElementType().getSizeInBits();
+ uint64_t MV = 0, Bit = 1;
+ for (unsigned i = 0; i < NE; ++i) {
+ MV |= Bit;
+ Bit <<= ES;
+ }
+ SDValue Ones = CurDAG->getTargetConstant(MV, MVT::i64);
+ SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl,
+ MVT::i64, Ones);
+ if (ExVT.getSizeInBits() == 32) {
+ SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
+ SDValue(Mask,0), SDValue(OnesReg,0));
+ SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, MVT::i32);
+ return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
+ SDValue(And,0), SubR);
+ }
+ return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
+ SDValue(Mask,0), SDValue(OnesReg,0));
+ }
+
SDNode *IsIntrinsic = N->getOperand(0).getNode();
if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
unsigned ID =
@@ -1141,7 +755,7 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
if (doesIntrinsicReturnPredicate(ID)) {
// Now we need to differentiate target data types.
if (N->getValueType(0) == MVT::i64) {
- // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
+ // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
MVT::i32,
@@ -1171,6 +785,203 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
}
//
+// Checking for intrinsics circular load/store, and bitreverse load/store
+// instrisics in order to select the correct lowered operation.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::hexagon_circ_ldd ||
+ IntNo == Intrinsic::hexagon_circ_ldw ||
+ IntNo == Intrinsic::hexagon_circ_lduh ||
+ IntNo == Intrinsic::hexagon_circ_ldh ||
+ IntNo == Intrinsic::hexagon_circ_ldub ||
+ IntNo == Intrinsic::hexagon_circ_ldb) {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Base = N->getOperand(2);
+ SDValue Load = N->getOperand(3);
+ SDValue ModifierExpr = N->getOperand(4);
+ SDValue Offset = N->getOperand(5);
+
+ // We need to add the rerurn type for the load. This intrinsic has
+ // two return types, one for the load and one for the post-increment.
+ // Only the *_ld instructions push the extra return type, and bump the
+ // result node operand number correspondingly.
+ std::vector<EVT> ResTys;
+ unsigned opc;
+ unsigned memsize, align;
+ MVT MvtSize = MVT::i32;
+
+ if (IntNo == Intrinsic::hexagon_circ_ldd) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i64);
+ opc = Hexagon::L2_loadrd_pci_pseudo;
+ memsize = 8;
+ align = 8;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldw) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadri_pci_pseudo;
+ memsize = 4;
+ align = 4;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrh_pci_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_circ_lduh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadruh_pci_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrb_pci_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldub) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrub_pci_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else
+ llvm_unreachable("no opc");
+
+ ResTys.push_back(MVT::Other);
+
+ // Copy over the arguments, which are the same mostly.
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(Base);
+ Ops.push_back(Load);
+ Ops.push_back(ModifierExpr);
+ int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ Ops.push_back(CurDAG->getTargetConstant(Val, MVT::i32));
+ Ops.push_back(Chain);
+ SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+
+ SDValue ST;
+ MachineMemOperand *Mem =
+ MF->getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, memsize, align);
+ if (MvtSize != MVT::i32)
+ ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+ MvtSize, Mem);
+ else
+ ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+ SDNode* Store = SelectStore(ST.getNode());
+
+ const SDValue Froms[] = { SDValue(N, 0),
+ SDValue(N, 1) };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Store, 0) };
+ ReplaceUses(Froms, Tos, 2);
+ return Result;
+ }
+
+ if (IntNo == Intrinsic::hexagon_brev_ldd ||
+ IntNo == Intrinsic::hexagon_brev_ldw ||
+ IntNo == Intrinsic::hexagon_brev_ldh ||
+ IntNo == Intrinsic::hexagon_brev_lduh ||
+ IntNo == Intrinsic::hexagon_brev_ldb ||
+ IntNo == Intrinsic::hexagon_brev_ldub) {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Base = N->getOperand(2);
+ SDValue Load = N->getOperand(3);
+ SDValue ModifierExpr = N->getOperand(4);
+
+ // We need to add the rerurn type for the load. This intrinsic has
+ // two return types, one for the load and one for the post-increment.
+ std::vector<EVT> ResTys;
+ unsigned opc;
+ unsigned memsize, align;
+ MVT MvtSize = MVT::i32;
+
+ if (IntNo == Intrinsic::hexagon_brev_ldd) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i64);
+ opc = Hexagon::L2_loadrd_pbr_pseudo;
+ memsize = 8;
+ align = 8;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldw) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadri_pbr_pseudo;
+ memsize = 4;
+ align = 4;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrh_pbr_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_brev_lduh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadruh_pbr_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrb_pbr_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldub) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrub_pbr_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else
+ llvm_unreachable("no opc");
+
+ ResTys.push_back(MVT::Other);
+
+ // Copy over the arguments, which are the same mostly.
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Base);
+ Ops.push_back(Load);
+ Ops.push_back(ModifierExpr);
+ Ops.push_back(Chain);
+ SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+ SDValue ST;
+ MachineMemOperand *Mem =
+ MF->getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, memsize, align);
+ if (MvtSize != MVT::i32)
+ ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+ MvtSize, Mem);
+ else
+ ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+ SDNode* Store = SelectStore(ST.getNode());
+
+ const SDValue Froms[] = { SDValue(N, 0),
+ SDValue(N, 1) };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Store, 0) };
+ ReplaceUses(Froms, Tos, 2);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
+
+//
// Checking for intrinsics which have predicate registers as operand(s)
// and lowering to the actual intrinsic.
//
@@ -1217,37 +1028,20 @@ SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
return SelectCode(N);
}
-
//
// Map predicate true (encoded as -1 in LLVM) to a XOR.
//
SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
SDLoc dl(N);
if (N->getValueType(0) == MVT::i1) {
- SDNode* Result;
+ SDNode* Result = 0;
int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
if (Val == -1) {
- // Create the IntReg = 1 node.
- SDNode* IntRegTFR =
- CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- CurDAG->getTargetConstant(0, MVT::i32));
-
- // Pd = IntReg
- SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
- SDValue(IntRegTFR, 0));
-
- // not(Pd)
- SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1,
- SDValue(Pd, 0));
-
- // xor(not(Pd))
- Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1,
- SDValue(Pd, 0), SDValue(NotPd, 0));
-
- // We have just built:
- // Rs = Pd
- // Pd = xor(not(Pd), Pd)
-
+ Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1);
+ } else if (Val == 0) {
+ Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1);
+ }
+ if (Result) {
ReplaceUses(N, Result);
return Result;
}
@@ -1283,347 +1077,282 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
return Result;
}
-
-SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return nullptr; // Already selected.
- }
-
-
- switch (N->getOpcode()) {
- case ISD::Constant:
- return SelectConstant(N);
-
- case ISD::ConstantFP:
- return SelectConstantFP(N);
-
- case ISD::ADD:
- return SelectAdd(N);
-
- case ISD::SHL:
- return SelectSHL(N);
-
- case ISD::LOAD:
- return SelectLoad(N);
-
- case ISD::STORE:
- return SelectStore(N);
-
- case ISD::SELECT:
- return SelectSelect(N);
-
- case ISD::TRUNCATE:
- return SelectTruncate(N);
-
- case ISD::MUL:
- return SelectMul(N);
-
- case ISD::ZERO_EXTEND:
- return SelectZeroExtend(N);
-
- case ISD::INTRINSIC_WO_CHAIN:
- return SelectIntrinsicWOChain(N);
- }
-
- return SelectCode(N);
-}
-
-
//
-// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
-// to define these instructions.
+// Map the following, where possible.
+// AND/FABS -> clrbit
+// OR -> setbit
+// XOR/FNEG ->toggle_bit.
//
-bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
-}
+SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
+ SDLoc dl(N);
+ EVT ValueVT = N->getValueType(0);
+ // We handle only 32 and 64-bit bit ops.
+ if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 ||
+ ValueVT == MVT::f32 || ValueVT == MVT::f64))
+ return SelectCode(N);
-bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+ // We handly only fabs and fneg for V5.
+ unsigned Opc = N->getOpcode();
+ if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps())
+ return SelectCode(N);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_0_Offset(Offset.getNode()));
+ int64_t Val = 0;
+ if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+ if (N->getOperand(1).getOpcode() == ISD::Constant)
+ Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue();
+ else
+ return SelectCode(N);
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_0_Offset(Offset.getNode()));
-}
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_1_Offset(Offset.getNode()));
+ if (Opc == ISD::AND) {
+ if (((ValueVT == MVT::i32) &&
+ (!((Val & 0x80000000) || (Val & 0x7fffffff)))) ||
+ ((ValueVT == MVT::i64) &&
+ (!((Val & 0x8000000000000000) || (Val & 0x7fffffff)))))
+ // If it's simple AND, do the normal op.
+ return SelectCode(N);
+ else
+ Val = ~Val;
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_1_Offset(Offset.getNode()));
-}
-
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_2_Offset(Offset.getNode()));
+ // If OR or AND is being fed by shl, srl and, sra don't do this change,
+ // because Hexagon provide |= &= on shl, srl, and sra.
+ // Traverse the DAG to see if there is shl, srl and sra.
+ if (Opc == ISD::OR || Opc == ISD::AND) {
+ switch (N->getOperand(0)->getOpcode()) {
+ default: break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL:
+ return SelectCode(N);
+ }
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_2_Offset(Offset.getNode()));
-}
+ // Make sure it's power of 2.
+ unsigned bitpos = 0;
+ if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+ if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) ||
+ ((ValueVT == MVT::i64) && !isPowerOf2_64(Val)))
+ return SelectCode(N);
-bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_0_Offset(Offset.getNode()));
+ // Get the bit position.
+ bitpos = countTrailingZeros(uint64_t(Val));
+ } else {
+ // For fabs and fneg, it's always the 31st bit.
+ bitpos = 31;
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_0_Offset(Offset.getNode()));
-}
+ unsigned BitOpc = 0;
+ // Set the right opcode for bitwise operations.
+ switch(Opc) {
+ default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+ case ISD::AND:
+ case ISD::FABS:
+ BitOpc = Hexagon::S2_clrbit_i;
+ break;
+ case ISD::OR:
+ BitOpc = Hexagon::S2_setbit_i;
+ break;
+ case ISD::XOR:
+ case ISD::FNEG:
+ BitOpc = Hexagon::S2_togglebit_i;
+ break;
+ }
-bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+ SDNode *Result;
+ // Get the right SDVal for the opcode.
+ SDValue SDVal = CurDAG->getTargetConstant(bitpos, MVT::i32);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_1_Offset(Offset.getNode()));
+ if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
+ Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
+ N->getOperand(0), SDVal);
+ } else {
+ // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it.
+ EVT SubValueVT;
+ if (ValueVT == MVT::i64)
+ SubValueVT = MVT::i32;
+ else
+ SubValueVT = MVT::f32;
+
+ SDNode *Reg = N->getOperand(0).getNode();
+ SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID,
+ MVT::i64);
+
+ SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg,
+ MVT::i32);
+ SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg,
+ MVT::i32);
+
+ SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, SDValue(Reg, 0));
+
+ SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, SDValue(Reg, 0));
+
+ // Clear/set/toggle hi or lo registers depending on the bit position.
+ if (SubValueVT != MVT::f32 && bitpos < 32) {
+ SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+ SubregLO, SDVal);
+ const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
+ SDValue(Result0, 0), SubregLoIdx };
+ Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ dl, ValueVT, Ops);
+ } else {
+ if (Opc != ISD::FABS && Opc != ISD::FNEG)
+ SDVal = CurDAG->getTargetConstant(bitpos-32, MVT::i32);
+ SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+ SubregHI, SDVal);
+ const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
+ SubregLO, SubregLoIdx };
+ Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ dl, ValueVT, Ops);
+ }
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_1_Offset(Offset.getNode()));
+
+ ReplaceUses(N, Result);
+ return Result;
}
-bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
+ int FX = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+ SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ SDLoc DL(N);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_2_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_2_Offset(Offset.getNode()));
-}
+ SDNode *R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero);
+ if (N->getHasDebugValue())
+ CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0));
+ return R;
+}
-bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() != ISD::ADD) {
- return(SelectADDRriS11_2(Addr, Base, Offset));
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return nullptr; // Already selected.
}
- return SelectADDRriS11_2(Addr, Base, Offset);
-}
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ return SelectConstant(N);
+ case ISD::ConstantFP:
+ return SelectConstantFP(N);
-bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+ case ISD::FrameIndex:
+ return SelectFrameIndex(N);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_3_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_3_Offset(Offset.getNode()));
-}
+ case ISD::ADD:
+ return SelectAdd(N);
-bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
- SDValue &R2) {
- if (Addr.getOpcode() == ISD::FrameIndex) return false;
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (Addr.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- if (isInt<13>(CN->getSExtValue()))
- return false; // Let the reg+imm pattern catch this!
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- return true;
- }
+ case ISD::SHL:
+ return SelectSHL(N);
- R1 = Addr;
+ case ISD::LOAD:
+ return SelectLoad(N);
- return true;
-}
+ case ISD::STORE:
+ return SelectStore(N);
+ case ISD::MUL:
+ return SelectMul(N);
-// Handle generic address case. It is accessed from inlined asm =m constraints,
-// which could have any kind of pointer.
-bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::FABS:
+ case ISD::FNEG:
+ return SelectBitOp(N);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
- }
+ case ISD::ZERO_EXTEND:
+ return SelectZeroExtend(N);
- if (Addr.getOpcode() == ISD::ADD) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
+ case ISD::INTRINSIC_W_CHAIN:
+ return SelectIntrinsicWChain(N);
+
+ case ISD::INTRINSIC_WO_CHAIN:
+ return SelectIntrinsicWOChain(N);
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
+ return SelectCode(N);
}
bool HexagonDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- SDValue Op0, Op1;
-
- switch (ConstraintCode) {
- case 'o': // Offsetable.
- case 'v': // Not offsetable.
- default: return true;
- case 'm': // Memory.
- if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
- return true;
+ SDValue Inp = Op, Res;
+
+ switch (ConstraintID) {
+ default:
+ return true;
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_o: // Offsetable.
+ case InlineAsm::Constraint_v: // Not offsetable.
+ case InlineAsm::Constraint_m: // Memory.
+ if (SelectAddrFI(Inp, Res))
+ OutOps.push_back(Res);
+ else
+ OutOps.push_back(Inp);
break;
}
- OutOps.push_back(Op0);
- OutOps.push_back(Op1);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
return false;
}
-bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
- unsigned UseCount = 0;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- UseCount++;
- }
-
- return (UseCount <= 1);
-
-}
-
-//===--------------------------------------------------------------------===//
-// Return 'true' if use count of the global address is below threshold.
-//===--------------------------------------------------------------------===//
-bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const {
- assert(N->getOpcode() == ISD::TargetGlobalAddress &&
- "Expecting a target global address");
-
- // Always try to fold the address.
- if (TM.getOptLevel() == CodeGenOpt::Aggressive)
- return true;
-
- GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
- DenseMap<const GlobalValue *, unsigned>::const_iterator GI =
- GlobalAddressUseCountMap.find(GA->getGlobal());
-
- if (GI == GlobalAddressUseCountMap.end())
- return false;
-
- return GI->second <= MaxNumOfUsesForConstExtenders;
-}
-
-//===--------------------------------------------------------------------===//
-// Return true if the non-GP-relative global address can be folded.
-//===--------------------------------------------------------------------===//
-inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) {
- return foldGlobalAddressImpl(N, R, false);
-}
-
-//===--------------------------------------------------------------------===//
-// Return true if the GP-relative global address can be folded.
-//===--------------------------------------------------------------------===//
-inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) {
- return foldGlobalAddressImpl(N, R, true);
-}
+void HexagonDAGToDAGISel::PreprocessISelDAG() {
+ SelectionDAG &DAG = *CurDAG;
+ std::vector<SDNode*> Nodes;
+ for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I)
+ Nodes.push_back(I);
+
+ // Simplify: (or (select c x 0) z) -> (select c (or x z) z)
+ // (or (select c 0 y) z) -> (select c z (or y z))
+ // This may not be the right thing for all targets, so do it here.
+ for (auto I: Nodes) {
+ if (I->getOpcode() != ISD::OR)
+ continue;
+
+ auto IsZero = [] (const SDValue &V) -> bool {
+ if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
+ return SC->isNullValue();
+ return false;
+ };
+ auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
+ if (Op.getOpcode() != ISD::SELECT)
+ return false;
+ return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
+ };
-//===--------------------------------------------------------------------===//
-// Fold offset of the global address if number of uses are below threshold.
-//===--------------------------------------------------------------------===//
-bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R,
- bool ShouldLookForGP) {
- if (N.getOpcode() == ISD::ADD) {
- SDValue N0 = N.getOperand(0);
- SDValue N1 = N.getOperand(1);
- if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) ||
- (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1);
- GlobalAddressSDNode *GA =
- dyn_cast<GlobalAddressSDNode>(N0.getOperand(0));
-
- if (Const && GA &&
- (GA->getOpcode() == ISD::TargetGlobalAddress)) {
- if ((N0.getOpcode() == HexagonISD::CONST32) &&
- !hasNumUsesBelowThresGA(GA))
- return false;
- R = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
- SDLoc(Const),
- N.getValueType(),
- GA->getOffset() +
- (uint64_t)Const->getSExtValue());
- return true;
+ SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
+ EVT VT = I->getValueType(0);
+ bool SelN0 = IsSelect0(N0);
+ SDValue SOp = SelN0 ? N0 : N1;
+ SDValue VOp = SelN0 ? N1 : N0;
+
+ if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
+ SDValue SC = SOp.getOperand(0);
+ SDValue SX = SOp.getOperand(1);
+ SDValue SY = SOp.getOperand(2);
+ SDLoc DLS = SOp;
+ if (IsZero(SY)) {
+ SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
+ SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
+ DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+ } else if (IsZero(SX)) {
+ SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
+ SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
+ DAG.ReplaceAllUsesWith(I, NewSel.getNode());
}
}
}
- return false;
}
+
bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) {
if (N.getOpcode() != ISD::FrameIndex)
return false;
@@ -1681,8 +1410,8 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
return false;
}
-bool HexagonDAGToDAGISel::isValueExtension(SDValue const &Val,
- unsigned FromBits, SDValue &Src) {
+bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
+ unsigned FromBits, SDValue &Src) {
unsigned Opc = Val.getOpcode();
switch (Opc) {
case ISD::SIGN_EXTEND:
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 0072994..a2209ab 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -164,6 +164,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::ZExt;
else
LocInfo = CCValAssign::AExt;
+ } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
}
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
@@ -239,6 +245,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::ZExt;
else
LocInfo = CCValAssign::AExt;
+ } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
}
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
@@ -764,7 +776,7 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
}
- SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+ SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl,
getPointerTy(), TargetJT);
SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
DAG.getConstant(2, MVT::i32));
@@ -944,6 +956,192 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
false, 0);
}
+// Creates a SPLAT instruction for a constant value VAL.
+static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) {
+ if (VT.getSimpleVT() == MVT::v4i8)
+ return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val);
+
+ if (VT.getSimpleVT() == MVT::v4i16)
+ return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val);
+
+ return SDValue();
+}
+
+static bool isSExtFree(SDValue N) {
+ // A sign-extend of a truncate of a sign-extend is free.
+ if (N.getOpcode() == ISD::TRUNCATE &&
+ N.getOperand(0).getOpcode() == ISD::AssertSext)
+ return true;
+ // We have sign-extended loads.
+ if (N.getOpcode() == ISD::LOAD)
+ return true;
+ return false;
+}
+
+SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue InpVal = Op.getOperand(0);
+ if (isa<ConstantSDNode>(InpVal)) {
+ uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
+ return DAG.getTargetConstant(countPopulation(V), MVT::i64);
+ }
+ SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
+}
+
+SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Cmp = Op.getOperand(2);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
+
+ EVT VT = Op.getValueType();
+ EVT LHSVT = LHS.getValueType();
+ EVT RHSVT = RHS.getValueType();
+
+ if (LHSVT == MVT::v2i16) {
+ assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
+ unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
+ SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
+ SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
+ return SC;
+ }
+
+ // Treat all other vector types as legal.
+ if (VT.isVector())
+ return Op;
+
+ // Equals and not equals should use sign-extend, not zero-extend, since
+ // we can represent small negative values in the compare instructions.
+ // The LLVM default is to use zero-extend arbitrarily in these cases.
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
+ (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (C && C->getAPIntValue().isNegative()) {
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+ return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+ LHS, RHS, Op.getOperand(2));
+ }
+ if (isSExtFree(LHS) || isSExtFree(RHS)) {
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+ return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+ LHS, RHS, Op.getOperand(2));
+ }
+ }
+ return SDValue();
+}
+
+SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG)
+ const {
+ SDValue PredOp = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
+ EVT OpVT = Op1.getValueType();
+ SDLoc DL(Op);
+
+ if (OpVT == MVT::v2i16) {
+ SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
+ SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
+ SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
+ SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
+ return TR;
+ }
+
+ return SDValue();
+}
+
+// Handle only specific vector loads.
+SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = LoadNode->getChain();
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+ SDValue Result;
+ SDValue Base = LoadNode->getBasePtr();
+ ISD::LoadExtType Ext = LoadNode->getExtensionType();
+ unsigned Alignment = LoadNode->getAlignment();
+ SDValue LoadChain;
+
+ if(Ext == ISD::NON_EXTLOAD)
+ Ext = ISD::ZEXTLOAD;
+
+ if (VT == MVT::v4i16) {
+ if (Alignment == 2) {
+ SDValue Loads[4];
+ // Base load.
+ Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // Base+2 load.
+ SDValue Increment = DAG.getConstant(2, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // SHL 16, then OR base and base+2.
+ SDValue ShiftAmount = DAG.getConstant(16, MVT::i32);
+ SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
+ SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
+ // Base + 4.
+ Increment = DAG.getConstant(4, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // Base + 6.
+ Increment = DAG.getConstant(6, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // SHL 16, then OR base+4 and base+6.
+ Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
+ // Combine to i64. This could be optimised out later if we can
+ // affect reg allocation of this code.
+ Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
+ LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Loads[0].getValue(1), Loads[1].getValue(1),
+ Loads[2].getValue(1), Loads[3].getValue(1));
+ } else {
+ // Perform default type expansion.
+ Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
+ LoadNode->isVolatile(), LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(), LoadNode->getAlignment());
+ LoadChain = Result.getValue(1);
+ }
+ } else
+ llvm_unreachable("Custom lowering unsupported load");
+
+ Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
+ // Since we pretend to lower a load, we need the original chain
+ // info attached to the result.
+ SDValue Ops[] = { Result, LoadChain };
+
+ return DAG.getMergeValues(Ops, DL);
+}
+
+
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
@@ -1028,6 +1226,19 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
}
+// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+}
+
SDValue
HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
@@ -1045,14 +1256,105 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
: TargetLowering(TM), Subtarget(&STI) {
// Set up the register classes.
+ addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa
+ addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
+ addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
+ promoteLdStType(MVT::v4i8, MVT::i32);
+ promoteLdStType(MVT::v2i16, MVT::i32);
if (Subtarget->hasV5TOps()) {
addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
}
+ addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
+ promoteLdStType(MVT::v8i8, MVT::i64);
+
+ // Custom lower v4i16 load only. Let v4i16 store to be
+ // promoted for now.
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64);
+ setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
+ promoteLdStType(MVT::v2i32, MVT::i64);
+
+ for (unsigned i = (unsigned) MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType) i;
+
+ // Hexagon does not have support for the following operations,
+ // so they need to be expanded.
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+
+ // Expand all any extend loads.
+ for (unsigned j = (unsigned) MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++j)
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType) j, VT, Expand);
+
+ // Expand all trunc stores.
+ for (unsigned TargetVT = (unsigned) MVT::FIRST_VECTOR_VALUETYPE;
+ TargetVT <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++TargetVT)
+ setTruncStoreAction(VT, (MVT::SimpleValueType) TargetVT, Expand);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::ConstantPool, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+
+ if (!isTypeLegal(VT))
+ continue;
+
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ setOperationAction(ISD::MUL, VT, Legal);
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ }
+
+ setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -1308,9 +1610,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Turn FP extload into load/fextend.
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
- // Hexagon has a i1 sign extending load.
- for (MVT VT : MVT::integer_valuetypes())
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
+
+ // No extending loads from i32.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
+ }
+
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -1358,6 +1665,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f64, Expand);
}
+ // Hexagon needs to optimize cases with negative constants.
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+
if (EmitJumpTables) {
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
} else {
@@ -1415,9 +1726,17 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTR, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
@@ -1429,7 +1748,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-
+
setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
@@ -1463,27 +1782,63 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const char*
HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return nullptr;
- case HexagonISD::CONST32: return "HexagonISD::CONST32";
- case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
- case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
- case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
- case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
- case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
- case HexagonISD::BRICC: return "HexagonISD::BRICC";
- case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
- case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
- case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
- case HexagonISD::Hi: return "HexagonISD::Hi";
- case HexagonISD::Lo: return "HexagonISD::Lo";
- case HexagonISD::FTOI: return "HexagonISD::FTOI";
- case HexagonISD::ITOF: return "HexagonISD::ITOF";
- case HexagonISD::CALLv3: return "HexagonISD::CALLv3";
- case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr";
- case HexagonISD::CALLR: return "HexagonISD::CALLR";
- case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
- case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
- case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ default: return nullptr;
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
+ case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
+ case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
+ case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
+ case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
+ case HexagonISD::BRICC: return "HexagonISD::BRICC";
+ case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
+ case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+ case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+ case HexagonISD::Hi: return "HexagonISD::Hi";
+ case HexagonISD::Lo: return "HexagonISD::Lo";
+ case HexagonISD::JT: return "HexagonISD::JT";
+ case HexagonISD::CP: return "HexagonISD::CP";
+ case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT";
+ case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
+ case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
+ case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB";
+ case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH";
+ case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
+ case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH";
+ case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB";
+ case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH";
+ case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH";
+ case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW";
+ case HexagonISD::VSRAW: return "HexagonISD::VSRAW";
+ case HexagonISD::VSRAH: return "HexagonISD::VSRAH";
+ case HexagonISD::VSRLW: return "HexagonISD::VSRLW";
+ case HexagonISD::VSRLH: return "HexagonISD::VSRLH";
+ case HexagonISD::VSHLW: return "HexagonISD::VSHLW";
+ case HexagonISD::VSHLH: return "HexagonISD::VSHLH";
+ case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ";
+ case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT";
+ case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU";
+ case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ";
+ case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT";
+ case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU";
+ case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ";
+ case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT";
+ case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU";
+ case HexagonISD::INSERT_ri: return "HexagonISD::INSERT_ri";
+ case HexagonISD::INSERT_rd: return "HexagonISD::INSERT_rd";
+ case HexagonISD::INSERT_riv: return "HexagonISD::INSERT_riv";
+ case HexagonISD::INSERT_rdv: return "HexagonISD::INSERT_rdv";
+ case HexagonISD::EXTRACTU_ri: return "HexagonISD::EXTRACTU_ri";
+ case HexagonISD::EXTRACTU_rd: return "HexagonISD::EXTRACTU_rd";
+ case HexagonISD::EXTRACTU_riv: return "HexagonISD::EXTRACTU_riv";
+ case HexagonISD::EXTRACTU_rdv: return "HexagonISD::EXTRACTU_rdv";
+ case HexagonISD::FTOI: return "HexagonISD::FTOI";
+ case HexagonISD::ITOF: return "HexagonISD::ITOF";
+ case HexagonISD::CALLv3: return "HexagonISD::CALLv3";
+ case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr";
+ case HexagonISD::CALLR: return "HexagonISD::CALLR";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
}
}
@@ -1505,6 +1860,474 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
}
+// shouldExpandBuildVectorWithShuffles
+// Should we expand the build vector with shuffles?
+bool
+HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const {
+
+ // Hexagon vector shuffle operates on element sizes of bytes or halfwords
+ EVT EltVT = VT.getVectorElementType();
+ int EltBits = EltVT.getSizeInBits();
+ if ((EltBits != 8) && (EltBits != 16))
+ return false;
+
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
+// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and
+// V2 are the two vectors to select data from, V3 is the permutation.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ if (SVN->isSplat()) {
+ int Lane = SVN->getSplatIndex();
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return createSplat(DAG, dl, VT, V1.getOperand(0));
+
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+ // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+ // reaches it).
+ if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(0))) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return createSplat(DAG, dl, VT, V1.getOperand(0));
+ }
+ return createSplat(DAG, dl, VT, DAG.getConstant(Lane, MVT::i32));
+ }
+
+ // FIXME: We need to support more general vector shuffles. See
+ // below the comment from the ARM backend that deals in the general
+ // case with the vector shuffles. For now, let expand handle these.
+ return SDValue();
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+}
+
+// If BUILD_VECTOR has same base element repeated several times,
+// report true.
+static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
+ unsigned NElts = BVN->getNumOperands();
+ SDValue V0 = BVN->getOperand(0);
+
+ for (unsigned i = 1, e = NElts; i != e; ++i) {
+ if (BVN->getOperand(i) != V0)
+ return false;
+ }
+ return true;
+}
+
+// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert
+// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
+static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) {
+ BuildVectorSDNode *BVN = 0;
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDValue V3;
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
+ isCommonSplatElement(BVN))
+ V3 = V2;
+ else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
+ isCommonSplatElement(BVN))
+ V3 = V1;
+ else
+ return SDValue();
+
+ SDValue CommonSplat = BVN->getOperand(0);
+ SDValue Result;
+
+ if (VT.getSimpleVT() == MVT::v4i16) {
+ switch (Op.getOpcode()) {
+ case ISD::SRA:
+ Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SHL:
+ Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SRL:
+ Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat);
+ break;
+ default:
+ return SDValue();
+ }
+ } else if (VT.getSimpleVT() == MVT::v2i32) {
+ switch (Op.getOpcode()) {
+ case ISD::SRA:
+ Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SHL:
+ Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SRL:
+ Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat);
+ break;
+ default:
+ return SDValue();
+ }
+ } else {
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+}
+
+SDValue
+HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ unsigned Size = VT.getSizeInBits();
+
+ // A vector larger than 64 bits cannot be represented in Hexagon.
+ // Expand will split the vector.
+ if (Size > 64)
+ return SDValue();
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned NElts = BVN->getNumOperands();
+
+ // Try to generate a SPLAT instruction.
+ if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) &&
+ (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, true) && SplatBitSize <= 16)) {
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >>
+ (32 - SplatBitSize));
+ return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, MVT::i32));
+ }
+
+ // Try to generate COMBINE to build v2i32 vectors.
+ if (VT.getSimpleVT() == MVT::v2i32) {
+ SDValue V0 = BVN->getOperand(0);
+ SDValue V1 = BVN->getOperand(1);
+
+ if (V0.getOpcode() == ISD::UNDEF)
+ V0 = DAG.getConstant(0, MVT::i32);
+ if (V1.getOpcode() == ISD::UNDEF)
+ V1 = DAG.getConstant(0, MVT::i32);
+
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0);
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1);
+ // If the element isn't a constant, it is in a register:
+ // generate a COMBINE Register Register instruction.
+ if (!C0 || !C1)
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+
+ // If one of the operands is an 8 bit integer constant, generate
+ // a COMBINE Immediate Immediate instruction.
+ if (isInt<8>(C0->getSExtValue()) ||
+ isInt<8>(C1->getSExtValue()))
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+ }
+
+ // Try to generate a S2_packhl to build v2i16 vectors.
+ if (VT.getSimpleVT() == MVT::v2i16) {
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i));
+ // If the element isn't a constant, it is in a register:
+ // generate a S2_packhl instruction.
+ if (!Cst) {
+ SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16,
+ BVN->getOperand(1), BVN->getOperand(0));
+
+ return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16,
+ pack);
+ }
+ }
+ }
+
+ // In the general case, generate a CONST32 or a CONST64 for constant vectors,
+ // and insert_vector_elt for all the other cases.
+ uint64_t Res = 0;
+ unsigned EltSize = Size / NElts;
+ SDValue ConstVal;
+ uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize);
+ bool HasNonConstantElements = false;
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's
+ // combine, const64, etc. are Big Endian.
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = BVN->getOperand(OpIdx);
+ if (Operand.getOpcode() == ISD::UNDEF)
+ continue;
+
+ int64_t Val = 0;
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand))
+ Val = Cst->getSExtValue();
+ else
+ HasNonConstantElements = true;
+
+ Val &= Mask;
+ Res = (Res << EltSize) | Val;
+ }
+
+ if (Size == 64)
+ ConstVal = DAG.getConstant(Res, MVT::i64);
+ else
+ ConstVal = DAG.getConstant(Res, MVT::i32);
+
+ // When there are non constant operands, add them with INSERT_VECTOR_ELT to
+ // ConstVal, the constant part of the vector.
+ if (HasNonConstantElements) {
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), MVT::i64);
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, MVT::i64));
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon
+ // is Big Endian.
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = BVN->getOperand(OpIdx);
+ if (dyn_cast<ConstantSDNode>(Operand))
+ // This operand is already in ConstVal.
+ continue;
+
+ if (VT.getSizeInBits() == 64 &&
+ Operand.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, MVT::i32);
+ Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ }
+
+ SDValue Idx = DAG.getConstant(OpIdx, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+ const SDValue Ops[] = {ConstVal, Operand, Combined};
+
+ if (VT.getSizeInBits() == 32)
+ ConstVal = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops);
+ else
+ ConstVal = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops);
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
+
+SDValue
+HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned NElts = Op.getNumOperands();
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), MVT::i64);
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, MVT::i64));
+ SDValue ConstVal = DAG.getConstant(0, MVT::i64);
+
+ ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
+ ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted);
+
+ if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) {
+ if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
+ // We are trying to concat two v2i16 to a single v4i16.
+ SDValue Vec0 = Op.getOperand(1);
+ SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+ }
+ }
+
+ if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) {
+ if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
+ // We are trying to concat two v4i8 to a single v8i8.
+ SDValue Vec0 = Op.getOperand(1);
+ SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+ }
+ }
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = Op.getOperand(OpIdx);
+
+ if (VT.getSizeInBits() == 64 &&
+ Operand.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, MVT::i32);
+ Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ }
+
+ SDValue Idx = DAG.getConstant(OpIdx, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+ const SDValue Ops[] = {ConstVal, Operand, Combined};
+
+ if (VT.getSizeInBits() == 32)
+ ConstVal = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops);
+ else
+ ConstVal = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
+
+SDValue
+HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+ SDLoc dl(Op);
+ SDValue Idx = Op.getOperand(1);
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ int EltSize = EltVT.getSizeInBits();
+ SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ?
+ EltSize : VTN * EltSize, MVT::i64);
+
+ // Constant element number.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Idx)) {
+ SDValue Offset = DAG.getConstant(C->getZExtValue() * EltSize, MVT::i32);
+ const SDValue Ops[] = {Vec, Width, Offset};
+
+ ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
+ assert(W && "Non constant width in LowerEXTRACT_VECTOR");
+
+ SDValue N;
+ // For certain extracts, it is a simple _hi/_lo subreg.
+ if (VecVT.getSimpleVT() == MVT::v2i32) {
+ // v2i32 -> i32 vselect.
+ if (C->getZExtValue() == 0)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, Vec);
+ else if (C->getZExtValue() == 1)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, Vec);
+ else
+ llvm_unreachable("Bad offset");
+ } else if ((VecVT.getSimpleVT() == MVT::v4i16) &&
+ (W->getZExtValue() == 32)) {
+ // v4i16 -> v2i16/i32 vselect.
+ if (C->getZExtValue() == 0)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, Vec);
+ else if (C->getZExtValue() == 2)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, Vec);
+ else
+ llvm_unreachable("Bad offset");
+ } else if ((VecVT.getSimpleVT() == MVT::v8i8) &&
+ (W->getZExtValue() == 32)) {
+ // v8i8 -> v4i8/i32 vselect.
+ if (C->getZExtValue() == 0)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, Vec);
+ else if (C->getZExtValue() == 4)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, Vec);
+ else
+ llvm_unreachable("Bad offset");
+ } else if (VecVT.getSizeInBits() == 32) {
+ N = DAG.getNode(HexagonISD::EXTRACTU_ri, dl, MVT::i32, Ops);
+ } else {
+ N = DAG.getNode(HexagonISD::EXTRACTU_rd, dl, MVT::i64, Ops);
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+ }
+
+ // Variable element number.
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+ DAG.getConstant(EltSize, MVT::i32));
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, MVT::i64));
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+ const SDValue Ops[] = {Vec, Combined};
+
+ SDValue N;
+ if (VecVT.getSizeInBits() == 32) {
+ N = DAG.getNode(HexagonISD::EXTRACTU_riv, dl, MVT::i32, Ops);
+ } else {
+ N = DAG.getNode(HexagonISD::EXTRACTU_rdv, dl, MVT::i64, Ops);
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+}
+
+SDValue
+HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ int EltSize = EltVT.getSizeInBits();
+ SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ?
+ EltSize : VTN * EltSize, MVT::i64);
+
+ if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) {
+ SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, MVT::i32);
+ const SDValue Ops[] = {Vec, Val, Width, Offset};
+
+ SDValue N;
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getNode(HexagonISD::INSERT_ri, dl, MVT::i32, Ops);
+ else
+ N = DAG.getNode(HexagonISD::INSERT_rd, dl, MVT::i64, Ops);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+ }
+
+ // Variable element number.
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+ DAG.getConstant(EltSize, MVT::i32));
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, MVT::i64));
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+ if (VT.getSizeInBits() == 64 &&
+ Val.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, MVT::i32);
+ Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val);
+ }
+
+ const SDValue Ops[] = {Vec, Val, Combined};
+
+ SDValue N;
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops);
+ else
+ N = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+}
+
bool
HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
// Assuming the caller does not have either a signext or zeroext modifier, and
@@ -1549,7 +2372,19 @@ SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SRA:
+ case ISD::SHL:
+ case ISD::SRL:
+ return LowerVECTOR_SHIFT(Op, DAG);
+ case ISD::ConstantPool:
+ return LowerConstantPool(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
// Frame & Return address. Currently unimplemented.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
@@ -1561,9 +2396,14 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ // Custom lower some vector loads.
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SELECT: return Op;
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VSELECT: return LowerVSELECT(Op, DAG);
+ case ISD::CTPOP: return LowerCTPOP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 151c28f..34b1ebb 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -37,6 +37,10 @@ bool isPositiveHalfWord(SDNode *N);
ADJDYNALLOC,
ARGEXTEND,
+ PIC_ADD,
+ AT_GOT,
+ AT_PCREL,
+
CMPICC, // Compare two GPR operands, set icc.
CMPFCC, // Compare two FP operands, set fcc.
BRICC, // Branch to dest on icc condition
@@ -54,23 +58,44 @@ bool isPositiveHalfWord(SDNode *N);
CALLR,
RET_FLAG, // Return with a flag operand.
- BR_JT, // Jump table.
- BARRIER, // Memory barrier
+ BR_JT, // Branch through jump table.
+ BARRIER, // Memory barrier.
+ JT, // Jump table.
+ CP, // Constant pool.
POPCOUNT,
COMBINE,
- WrapperJT,
- WrapperCP,
- WrapperCombineII,
- WrapperCombineRR,
- WrapperCombineRI_V4,
- WrapperCombineIR_V4,
- WrapperPackhl,
- WrapperSplatB,
- WrapperSplatH,
- WrapperShuffEB,
- WrapperShuffEH,
- WrapperShuffOB,
- WrapperShuffOH,
+ PACKHL,
+ VSPLATB,
+ VSPLATH,
+ SHUFFEB,
+ SHUFFEH,
+ SHUFFOB,
+ SHUFFOH,
+ VSXTBH,
+ VSXTBW,
+ VSRAW,
+ VSRAH,
+ VSRLW,
+ VSRLH,
+ VSHLW,
+ VSHLH,
+ VCMPBEQ,
+ VCMPBGT,
+ VCMPBGTU,
+ VCMPHEQ,
+ VCMPHGT,
+ VCMPHGTU,
+ VCMPWEQ,
+ VCMPWGT,
+ VCMPWGTU,
+ INSERT_ri,
+ INSERT_rd,
+ INSERT_riv,
+ INSERT_rdv,
+ EXTRACTU_ri,
+ EXTRACTU_rd,
+ EXTRACTU_riv,
+ EXTRACTU_rdv,
TC_RETURN,
EH_RETURN,
DCFETCH
@@ -85,6 +110,8 @@ bool isPositiveHalfWord(SDNode *N);
bool CanReturnSmallStruct(const Function* CalleeFn,
unsigned& RetSize) const;
+ void promoteLdStType(EVT VT, EVT PromotedLdStVT);
+
public:
const HexagonSubtarget *Subtarget;
explicit HexagonTargetLowering(const TargetMachine &TM,
@@ -110,10 +137,17 @@ bool isPositiveHalfWord(SDNode *N);
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ // Should we expand the build vector with shuffles?
+ bool shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const override;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
- SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
@@ -137,9 +171,13 @@ bool isPositiveHalfWord(SDNode *N);
const SmallVectorImpl<SDValue> &OutVals,
SDValue Callee) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -170,6 +208,15 @@ bool isPositiveHalfWord(SDNode *N);
const std::string &Constraint,
MVT VT) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "v")
+ return InlineAsm::Constraint_v;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
// Intrinsics
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 3d04678..36a7e9f 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -76,7 +76,7 @@ class OpcodeHexagon {
class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
string cstr, InstrItinClass itin, IType type>
- : Instruction, OpcodeHexagon {
+ : Instruction {
let Namespace = "Hexagon";
dag OutOperandList = outs;
@@ -84,18 +84,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let AsmString = asmstr;
let Pattern = pattern;
let Constraints = cstr;
- let Itinerary = itin;
- let Size = 4;
-
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- // Instruction type according to the ISA.
+ let Itinerary = itin;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ // Instruction type according to the ISA.
IType Type = type;
let TSFlags{4-0} = Type.Value;
@@ -197,7 +197,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let mayLoad = 1 in
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
let mayLoad = 1 in
class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -217,7 +217,7 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayLoad = 1 in
class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
@@ -225,7 +225,7 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayStore = 1 in
class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
@@ -234,7 +234,7 @@ class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayStore = 1 in
class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
@@ -247,13 +247,14 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>,
+ OpcodeHexagon;
// ALU32 Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon;
// ALU64 Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -261,7 +262,8 @@ class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
@@ -274,7 +276,8 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -290,7 +293,8 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -304,34 +308,37 @@ class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Definition of the instruction class NOT CHANGED.
class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon;
// JR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon;
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>,
+ OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>,
+ OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr="">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>,
+ OpcodeHexagon;
//===----------------------------------------------------------------------===//
// Instruction Classes Definitions -
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 5fec80b..7f7b2c9 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -17,10 +17,88 @@
// *** Must match BaseInfo.h ***
//----------------------------------------------------------------------------//
-def TypeMEMOP : IType<9>;
-def TypeNV : IType<10>;
+def TypeMEMOP : IType<9>;
+def TypeNV : IType<10>;
+def TypeDUPLEX : IType<11>;
def TypeCOMPOUND : IType<12>;
-def TypePREFIX : IType<30>;
+def TypeAG_VX : IType<28>;
+def TypeAG_VM : IType<29>;
+def TypePREFIX : IType<30>;
+
+// Duplex Instruction Class Declaration
+//===----------------------------------------------------------------------===//
+
+class OpcodeDuplex {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+ bits<13> ISubHi = 0; // Low sub-insn
+ bits<13> ISubLo = 0; // High sub-insn
+
+ let Inst{31-29} = IClass{3-1};
+ let Inst{13} = IClass{0};
+ let Inst{15-14} = 0;
+ let Inst{28-16} = ISubHi;
+ let Inst{12-0} = ISubLo;
+}
+
+class InstDuplex<bits<4> iClass, list<dag> pattern = [],
+ string cstr = "">
+ : Instruction, OpcodeDuplex {
+ let Namespace = "Hexagon";
+ IType Type = TypeDUPLEX; // uses slot 0,1
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins);
+ let IClass = iClass;
+ let Constraints = cstr;
+ let Itinerary = DUPLEX;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ let TSFlags{4-0} = Type.Value;
+
+ // Predicated instructions.
+ bits<1> isPredicated = 0;
+ let TSFlags{6} = isPredicated;
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{7} = isPredicatedFalse;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{8} = isPredicatedNew;
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{9} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{10} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ bits<1> isNVStorable = 0;
+ let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+ bits<1> isNVStore = 0;
+ let TSFlags{15} = isNVStore; // New-value store insn.
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{16} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{17} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+ bits<2> opExtentAlign = 0;
+ let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+}
//----------------------------------------------------------------------------//
// Instruction Classes Definitions
@@ -31,7 +109,7 @@ def TypePREFIX : IType<30>;
//
class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon;
class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
@@ -56,7 +134,8 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayLoad = 1, mayStore = 1 in
class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>,
+ OpcodeHexagon;
class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
@@ -65,8 +144,9 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let isCodeGenOnly = 1 in
class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
- TypePREFIX>;
+ TypePREFIX>, OpcodeHexagon;
class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
+ OpcodeHexagon;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 9bae12c..fbf1ca9 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -62,10 +62,8 @@ const int Hexagon_MEMB_AUTOINC_MIN = -8;
void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
- : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
- RI(ST), Subtarget(ST) {
-}
-
+ : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+ RI(), Subtarget(ST) {}
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
@@ -159,15 +157,19 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
}
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else {
- BuildMI(&MBB, DL,
- get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ // If Cond[0] is a basic block, insert ENDLOOP0.
+ if (Cond[0].isMBB())
+ BuildMI(&MBB, DL, get(Hexagon::ENDLOOP0)).addMBB(Cond[0].getMBB());
+ else
+ BuildMI(&MBB, DL,
+ get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
}
return 1;
}
+ // We don't handle ENDLOOP0 with a conditional branch in AnalyzeBranch.
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
-
return 2;
}
@@ -211,9 +213,11 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
-
+
+ bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
+ I->getOperand(0).isMBB();
// Delete the JMP if it's equivalent to a fall-through.
- if (AllowModify && I->getOpcode() == Hexagon::J2_jump &&
+ if (AllowModify && JumpToBlock &&
MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
I->eraseFromParent();
@@ -243,6 +247,14 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
} while(I);
int LastOpcode = LastInst->getOpcode();
+ int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
+ // If the branch target is not a basic block, it could be a tail call.
+ // (It is, if the target is a function.)
+ if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB())
+ return true;
+ if (SecLastOpcode == Hexagon::J2_jump &&
+ !SecondLastInst->getOperand(0).isMBB())
+ return true;
bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode);
@@ -270,8 +282,6 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
}
- int SecLastOpcode = SecondLastInst->getOpcode();
-
bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
@@ -308,30 +318,35 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- int BOpc = Hexagon::J2_jump;
- int BccOpc = Hexagon::J2_jumpt;
- int BccOpcNot = Hexagon::J2_jumpf;
-
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
- if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
- I->getOpcode() != BccOpcNot)
- return 0;
-
- // Remove the branch.
- I->eraseFromParent();
+ unsigned Opc1 = I->getOpcode();
+ switch (Opc1) {
+ case Hexagon::J2_jump:
+ case Hexagon::J2_jumpt:
+ case Hexagon::J2_jumpf:
+ case Hexagon::ENDLOOP0:
+ I->eraseFromParent();
+ break;
+ default:
+ return 0;
+ }
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
- if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
- return 1;
-
- // Remove the branch.
- I->eraseFromParent();
- return 2;
+ unsigned Opc2 = I->getOpcode();
+ switch (Opc2) {
+ case Hexagon::J2_jumpt:
+ case Hexagon::J2_jumpf:
+ case Hexagon::ENDLOOP0:
+ I->eraseFromParent();
+ return 2;
+ default:
+ return 1;
+ }
}
@@ -549,12 +564,95 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
llvm_unreachable("Unimplemented");
}
+bool
+HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ const HexagonRegisterInfo &TRI = getRegisterInfo();
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::TFR_PdTrue: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::TFR_PdFalse: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::VMULW: {
+ // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Src1Reg = MI->getOperand(1).getReg();
+ unsigned Src2Reg = MI->getOperand(2).getReg();
+ unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+ TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ .addReg(Src2SubHi);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+ TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ .addReg(Src2SubLo);
+ MBB.erase(MI);
+ MRI.clearKillFlags(Src1SubHi);
+ MRI.clearKillFlags(Src1SubLo);
+ MRI.clearKillFlags(Src2SubHi);
+ MRI.clearKillFlags(Src2SubLo);
+ return true;
+ }
+ case Hexagon::VMULW_ACC: {
+ // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Src1Reg = MI->getOperand(1).getReg();
+ unsigned Src2Reg = MI->getOperand(2).getReg();
+ unsigned Src3Reg = MI->getOperand(3).getReg();
+ unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+ unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+ TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ .addReg(Src2SubHi).addReg(Src3SubHi);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+ TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ .addReg(Src2SubLo).addReg(Src3SubLo);
+ MBB.erase(MI);
+ MRI.clearKillFlags(Src1SubHi);
+ MRI.clearKillFlags(Src1SubLo);
+ MRI.clearKillFlags(Src2SubHi);
+ MRI.clearKillFlags(Src2SubLo);
+ MRI.clearKillFlags(Src3SubHi);
+ MRI.clearKillFlags(Src3SubLo);
+ return true;
+ }
+ case Hexagon::TCRETURNi:
+ MI->setDesc(get(Hexagon::J2_jump));
+ return true;
+ case Hexagon::TCRETURNr:
+ MI->setDesc(get(Hexagon::J2_jumpr));
+ return true;
+ }
+
+ return false;
+}
MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FI) const {
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FI) const {
// Hexagon_TODO: Implement.
return nullptr;
}
@@ -641,7 +739,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
switch(Opc) {
case Hexagon::A2_tfrsi:
- return isInt<12>(MI->getOperand(1).getImm());
+ return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm());
case Hexagon::S2_storerd_io:
return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
@@ -1036,6 +1134,8 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
//
bool HexagonInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ if (!Cond.empty() && Cond[0].isMBB())
+ return true;
if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
Cond.erase(Cond.begin());
} else {
@@ -1521,7 +1621,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown .new type");
- // store new value byte
case Hexagon::S4_storerb_ur:
return Hexagon::S4_storerbnew_ur;
@@ -1531,6 +1630,20 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
case Hexagon::S4_storeri_ur:
return Hexagon::S4_storerinew_ur;
+ case Hexagon::S2_storerb_pci:
+ return Hexagon::S2_storerb_pci;
+
+ case Hexagon::S2_storeri_pci:
+ return Hexagon::S2_storeri_pci;
+
+ case Hexagon::S2_storerh_pci:
+ return Hexagon::S2_storerh_pci;
+
+ case Hexagon::S2_storerd_pci:
+ return Hexagon::S2_storerd_pci;
+
+ case Hexagon::S2_storerf_pci:
+ return Hexagon::S2_storerf_pci;
}
return 0;
}
@@ -1647,7 +1760,7 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
// We currently only handle isGlobal() because it is the only kind of
// object we are going to end up with here for now.
// In the future we probably should add isSymbol(), etc.
- if (MO.isGlobal() || MO.isSymbol())
+ if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress())
return true;
// If the extendable operand is not 'Immediate' type, the instruction should
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 6acfbec..2644248 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -26,7 +26,7 @@
namespace llvm {
struct EVT;
-
+class HexagonSubtarget;
class HexagonInstrInfo : public HexagonGenInstrInfo {
virtual void anchor();
const HexagonRegisterInfo RI;
@@ -102,15 +102,21 @@ public:
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
+ /// expandPostRAPseudo - This function is called for all pseudo instructions
+ /// that remain after register allocation. Many pseudo instructions are
+ /// created to help register allocation. This is the place to convert them
+ /// into real instructions. The target can edit MI in place, or it can insert
+ /// new instructions and erase MI. The function should return true if
+ /// anything was changed.
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const override {
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const override {
return nullptr;
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 60635cf..19cf993 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -104,10 +104,16 @@ def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>;
//===----------------------------------------------------------------------===//
// ALU32/ALU +
//===----------------------------------------------------------------------===//
+// Add.
+
+def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
@@ -243,6 +249,9 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
}
+def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
+def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
+
let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
: ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -321,7 +330,7 @@ let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
"$Rdd = combine(#$s8, #$S8)",
[(set (i64 DoubleRegs:$Rdd),
- (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> {
+ (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> {
bits<5> Rdd;
bits<8> s8;
bits<8> S8;
@@ -406,7 +415,7 @@ multiclass Addri_base<string mnemonic, SDNode OpNode> {
defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
-def: Pat<(i32 (add I32:$Rs, s16ExtPred:$s16)),
+def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)),
(i32 (A2_addi I32:$Rs, imm:$s16))>;
//===----------------------------------------------------------------------===//
@@ -420,7 +429,7 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
: ALU32_ri <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, s10Ext:$s10),
"$Rd = "#mnemonic#"($Rs, #$s10)" ,
- [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> {
+ [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> {
bits<5> Rd;
bits<5> Rs;
bits<10> s10;
@@ -465,7 +474,7 @@ def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
let Inst{27-24} = 0b1111;
}
-def: Pat<(sub s10ExtPred:$s10, IntRegs:$Rs),
+def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs),
(A2_subri imm:$s10, IntRegs:$Rs)>;
// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
@@ -613,7 +622,7 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
- [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+ [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
ImmRegRel, PredRel {
bits<5> Rd;
bits<16> s16;
@@ -637,9 +646,13 @@ def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
// TODO: see if this instruction can be deleted..
let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
- isAsmParserOnly = 1 in
-def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1),
+ isAsmParserOnly = 1 in {
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1),
"$dst = #$src1">;
+def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins s8Ext:$src1, s8Imm:$src2),
+ "$dst = combine(##$src1, #$src2)">;
+}
//===----------------------------------------------------------------------===//
// ALU32/ALU -
@@ -677,11 +690,11 @@ let opExtendable = 3 in
def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
"$Rd = mux($Pu, $Rs, #$s8)">;
-def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)),
- (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>;
+def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)),
+ (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>;
-def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)),
- (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>;
+def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)),
+ (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>;
// C2_muxii: Scalar mux immediates.
let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
@@ -690,7 +703,7 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
(ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8),
"$Rd = mux($Pu, #$s8, #$S8)" ,
[(set (i32 IntRegs:$Rd),
- (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > {
+ (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > {
bits<5> Rd;
bits<2> Pu;
bits<8> s8;
@@ -706,6 +719,12 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
let Inst{4-0} = Rd;
}
+let isCodeGenOnly = 1, isPseudo = 1 in
+def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"should not emit\" ", []>;
+
+
//===----------------------------------------------------------------------===//
// template class for non-predicated alu32_2op instructions
// - aslh, asrh, sxtb, sxth, zxth
@@ -987,6 +1006,17 @@ def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
//===----------------------------------------------------------------------===//
// ALU32/PRED +
//===----------------------------------------------------------------------===//
+// No bits needed. If cmp.ge is found the assembler parser will
+// transform it to cmp.gt subtracting 1 from the immediate.
+let isPseudo = 1 in {
+def C2_cmpgei: ALU32Inst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Pd = cmp.ge($Rs, #$s8)">;
+def C2_cmpgeui: ALU32Inst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8),
+ "$Pd = cmp.geu($Rs, #$s8)">;
+}
+
//===----------------------------------------------------------------------===//
// ALU32/PRED -
@@ -1742,27 +1772,29 @@ def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
InstHexagon MI> {
def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
+ def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
+ (VT (MI AddrFI:$fi, imm:$Off))>;
def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
(VT (MI IntRegs:$Rs, imm:$Off))>;
def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
}
let AddedComplexity = 20 in {
- defm: Loadx_pat<load, i32, s11_2ExtPred, L2_loadri_io>;
- defm: Loadx_pat<load, i64, s11_3ExtPred, L2_loadrd_io>;
- defm: Loadx_pat<atomic_load_8 , i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<atomic_load_16, i32, s11_1ExtPred, L2_loadruh_io>;
- defm: Loadx_pat<atomic_load_32, i32, s11_2ExtPred, L2_loadri_io>;
- defm: Loadx_pat<atomic_load_64, i64, s11_3ExtPred, L2_loadrd_io>;
-
- defm: Loadx_pat<extloadi1, i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<extloadi8, i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<extloadi16, i32, s11_1ExtPred, L2_loadruh_io>;
- defm: Loadx_pat<sextloadi8, i32, s11_0ExtPred, L2_loadrb_io>;
- defm: Loadx_pat<sextloadi16, i32, s11_1ExtPred, L2_loadrh_io>;
- defm: Loadx_pat<zextloadi1, i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<zextloadi8, i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<zextloadi16, i32, s11_1ExtPred, L2_loadruh_io>;
+ defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
+ defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
+
+ defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
+ defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
+ defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
// No sextloadi1.
}
@@ -2707,7 +2739,7 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
def M2_mpysip : T_MType_mpy_ri <0, u8Ext,
- [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>;
+ [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>;
def M2_mpysin : T_MType_mpy_ri <1, u8Imm,
[(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
@@ -2729,7 +2761,7 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
"$dst = mpyi($src1, #$src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- s9ExtPred:$src2))]>, ImmRegRel;
+ s32ImmPred:$src2))]>, ImmRegRel;
let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3,
InputType = "imm" in
@@ -2780,7 +2812,7 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext,
[(set (i32 IntRegs:$dst),
- (add (mul IntRegs:$src2, u8ExtPred:$src3),
+ (add (mul IntRegs:$src2, u32ImmPred:$src3),
IntRegs:$src1))]>, ImmRegRel;
def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
@@ -2793,7 +2825,7 @@ let CextOpcode = "ADD_acc" in {
let isExtentSigned = 1 in
def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
[(set (i32 IntRegs:$dst),
- (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3),
+ (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3),
(i32 IntRegs:$src1)))]>, ImmRegRel;
def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0,
@@ -2825,9 +2857,9 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
(MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
-def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>;
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>;
def : T_MType_acc_pat2 <M2_nacci, add, sub>;
//===----------------------------------------------------------------------===//
@@ -3514,7 +3546,8 @@ let addrMode = BaseImmOffset, InputType = "imm" in {
}
// Patterns for generating stores, where the address takes different forms:
-// - frameindex,,
+// - frameindex,
+// - frameindex + offset,
// - base + offset,
// - simple (base address without offset).
// These would usually be used together (via Storex_pat defined below), but
@@ -3522,6 +3555,10 @@ let addrMode = BaseImmOffset, InputType = "imm" in {
// AddedComplexity) to the individual patterns.
class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
: Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
+class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
InstHexagon MI>
: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
@@ -3537,6 +3574,10 @@ class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
InstHexagon MI>
: Pat<(Store Value:$Rs, AddrFI:$fi),
(MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
+class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ PatFrag ValueMod, InstHexagon MI>
+ : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
PatFrag ValueMod, InstHexagon MI>
: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
@@ -3548,14 +3589,16 @@ class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
InstHexagon MI> {
- def: Storex_fi_pat <Store, Value, MI>;
- def: Storex_add_pat <Store, Value, ImmPred, MI>;
+ def: Storex_fi_pat <Store, Value, MI>;
+ def: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
+ def: Storex_add_pat <Store, Value, ImmPred, MI>;
}
multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
PatFrag ValueMod, InstHexagon MI> {
- def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
- def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+ def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
+ def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+ def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
}
// Regular stores in the DAG have two operands: value and address.
@@ -3567,15 +3610,15 @@ class SwapSt<PatFrag F>
: PatFrag<(ops node:$val, node:$ptr), F.Fragment>;
let AddedComplexity = 20 in {
- defm: Storex_pat<truncstorei8, I32, s11_0ExtPred, S2_storerb_io>;
- defm: Storex_pat<truncstorei16, I32, s11_1ExtPred, S2_storerh_io>;
- defm: Storex_pat<store, I32, s11_2ExtPred, S2_storeri_io>;
- defm: Storex_pat<store, I64, s11_3ExtPred, S2_storerd_io>;
+ defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
- defm: Storex_pat<SwapSt<atomic_store_8>, I32, s11_0ExtPred, S2_storerb_io>;
- defm: Storex_pat<SwapSt<atomic_store_16>, I32, s11_1ExtPred, S2_storerh_io>;
- defm: Storex_pat<SwapSt<atomic_store_32>, I32, s11_2ExtPred, S2_storeri_io>;
- defm: Storex_pat<SwapSt<atomic_store_64>, I64, s11_3ExtPred, S2_storerd_io>;
+ defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
}
// Simple patterns should be tried with the least priority.
@@ -3590,9 +3633,9 @@ def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
let AddedComplexity = 20 in {
- defm: Storexm_pat<truncstorei8, I64, s11_0ExtPred, LoReg, S2_storerb_io>;
- defm: Storexm_pat<truncstorei16, I64, s11_1ExtPred, LoReg, S2_storerh_io>;
- defm: Storexm_pat<truncstorei32, I64, s11_2ExtPred, LoReg, S2_storeri_io>;
+ defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
+ defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
+ defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
}
def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
@@ -4321,6 +4364,14 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
// XTYPE/PERM +
//===----------------------------------------------------------------------===//
+def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
+ (i32 8)),
+ (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
+ (i32 16)),
+ (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
+ (zextloadi8 (i32 IntRegs:$b))),
+ (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
+
//===----------------------------------------------------------------------===//
// XTYPE/PERM -
//===----------------------------------------------------------------------===//
@@ -4364,7 +4415,7 @@ def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
// Patterns for loads of i1:
def: Pat<(i1 (load AddrFI:$fi)),
(C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
-def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s11_0ExtPred:$Off))),
+def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))),
(C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
def: Pat<(i1 (load (i32 IntRegs:$Rs))),
(C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
@@ -4375,7 +4426,7 @@ def I1toI32: OutPatFrag<(ops node:$Rs),
def I32toI1: OutPatFrag<(ops node:$Rs),
(i1 (C2_tfrrp (i32 $Rs)))>;
-defm: Storexm_pat<store, I1, s11_0ExtPred, I1toI32, S2_storerb_io>;
+defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>;
def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
//===----------------------------------------------------------------------===//
@@ -4474,6 +4525,12 @@ def Y2_barrier : SYSInst<(outs), (ins),
//===----------------------------------------------------------------------===//
// SYSTEM/SUPER -
//===----------------------------------------------------------------------===//
+
+// Generate frameindex addresses.
+let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+ isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in
+def TFR_FI: ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$fi, s32Imm:$Off), "">;
+
//===----------------------------------------------------------------------===//
// CRUSER - Type.
//===----------------------------------------------------------------------===//
@@ -4519,6 +4576,11 @@ class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
multiclass LOOP_ri<string mnemonic> {
def i : LOOP_iBase<mnemonic, brtarget>;
def r : LOOP_rBase<mnemonic, brtarget>;
+
+ let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
+ def iext: LOOP_iBase<mnemonic, brtargetExt, 1>;
+ def rext: LOOP_rBase<mnemonic, brtargetExt, 1>;
+ }
}
@@ -4676,36 +4738,6 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
let Inst{20-16} = Rs;
}
-let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in
-def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
- s12ImmPred:$src3)))]>;
-
-let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in
-def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
- (i32 IntRegs:$src3))))]>;
-
-let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in
-def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
- s12ImmPred:$src3)))]>;
-
-// Generate frameindex addresses.
-let isReMaterializable = 1, isCodeGenOnly = 1 in
-def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
- "$dst = add($src1)",
- [(set (i32 IntRegs:$dst), ADDRri:$src1)]>;
-
// Support for generating global address.
// Taken from X86InstrInfo.td.
def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
@@ -4750,30 +4782,29 @@ def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
"$dst.h = #HI($label@GOTREL)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- isAsmParserOnly = 1 in
-def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
- "$dst.l = #LO($imm_value)",
- []>;
-
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global@GOT)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- isAsmParserOnly = 1 in
-def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
- "$dst.h = #HI($imm_value)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global@GOT)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- isAsmParserOnly = 1 in
-def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
- "$dst.l = #LO($jt)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global@GOTREL)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- isAsmParserOnly = 1 in
-def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
- "$dst.h = #HI($jt)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global@GOTREL)",
+ []>;
// This pattern is incorrect. When we add small data, we should change
// this pattern to use memw(#foo).
@@ -4785,31 +4816,19 @@ def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
(load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst = CONST32(#$global)",
- [(set (i32 IntRegs:$dst),
- (HexagonCONST32 tglobaladdr:$global))]>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def CONST32_set_jt : CONSTLDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt),
"$dst = CONST32(#$jt)",
[(set (i32 IntRegs:$dst),
(HexagonCONST32 tjumptable:$jt))]>;
let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
-def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst = CONST32(#$global)",
- [(set (i32 IntRegs:$dst),
- (HexagonCONST32_GP tglobaladdr:$global))]>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global),
"$dst = CONST32(#$global)",
[(set (i32 IntRegs:$dst), imm:$global) ]>;
-// Map BlockAddress lowering to CONST32_Int_Real
-def : Pat<(HexagonCONST32_GP tblockaddress:$addr),
- (CONST32_Int_Real tblockaddress:$addr)>;
+// Map TLS addressses to a CONST32 instruction
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>;
let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
@@ -4869,21 +4888,17 @@ let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
def TCRETURNr : T_JMPr;
// Direct tail-calls.
-let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
-isTerminator = 1, isCodeGenOnly = 1 in {
- def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst",
- [], "", J_tc_2early_SLOT23>;
- def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst",
- [], "", J_tc_2early_SLOT23>;
-}
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>;
//Tail calls.
def: Pat<(HexagonTCRet tglobaladdr:$dst),
- (TCRETURNtg tglobaladdr:$dst)>;
+ (TCRETURNi tglobaladdr:$dst)>;
def: Pat<(HexagonTCRet texternalsym:$dst),
- (TCRETURNtext texternalsym:$dst)>;
+ (TCRETURNi texternalsym:$dst)>;
def: Pat<(HexagonTCRet (i32 IntRegs:$dst)),
- (TCRETURNr (i32 IntRegs:$dst))>;
+ (TCRETURNr IntRegs:$dst)>;
// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
def: Pat<(and (i32 IntRegs:$src1), 65535),
@@ -4900,19 +4915,19 @@ def: Pat<(add (i1 PredRegs:$src1), -1),
(C2_not PredRegs:$src1)>;
// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ExtPred:$src3),
- (C2_muxii PredRegs:$src1, s8ExtPred:$src3, s8ImmPred:$src2)>;
+def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3),
+ (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>;
// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
// => r0 = C2_muxir(p0, r1, #i)
-def: Pat<(select (not (i1 PredRegs:$src1)), s8ExtPred:$src2,
+def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2,
(i32 IntRegs:$src3)),
- (C2_muxir PredRegs:$src1, IntRegs:$src3, s8ExtPred:$src2)>;
+ (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>;
// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
// => r0 = C2_muxri (p0, #i, r1)
-def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s8ExtPred:$src3),
- (C2_muxri PredRegs:$src1, s8ExtPred:$src3, IntRegs:$src2)>;
+def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3),
+ (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>;
// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
@@ -4952,26 +4967,6 @@ def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset),
(J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)),
bb:$offset)>;
-// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
-def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- bb:$offset),
- (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
-
-def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
- bb:$offset)>;
-
-def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
- bb:$offset)>;
-
-def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
- bb:$offset)>;
-
// Map from a 64-bit select to an emulated 64-bit mux.
// Hexagon does not support 64-bit MUXes; so emulate with combines.
def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
@@ -4987,10 +4982,6 @@ def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
(C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
(C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
-// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
-def : Pat<(i1 (load ADDRriS11_2:$addr)),
- (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>;
-
// Map for truncating from 64 immediates to 32 bit immediates.
def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
(LoReg DoubleRegs:$src)>;
@@ -4999,42 +4990,10 @@ def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
(C2_tfrrp (LoReg DoubleRegs:$src))>;
-// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
-def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
-def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-// Map memw(Rs) = Rdd -> memw(Rs) = Rt
-def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
-def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
-def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
-
-
-// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
-def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
-
-// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
-def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>;
-
// rs <= rt -> !(rs > rt).
let AddedComplexity = 30 in
-def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>;
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
// rs <= rt -> !(rs > rt).
def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
@@ -5048,13 +5007,8 @@ def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
// Hexagon_TODO: We should improve on this.
// rs != rt -> !(rs == rt).
let AddedComplexity = 30 in
-def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (C2_not (C2_cmpeqi IntRegs:$src1, s10ExtPred:$src2))>;
-
-// Map cmpne(Rs) -> !cmpeqe(Rs).
-// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>;
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>;
// Convert setne back to xor for hexagon since we compute w/ pred registers.
def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
@@ -5072,8 +5026,8 @@ def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
let AddedComplexity = 30 in
-def: Pat<(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>;
+def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
// rss >= rtt -> !(rtt > rss).
@@ -5084,20 +5038,21 @@ def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
// rs < rt -> !(rs >= rt).
let AddedComplexity = 30 in
-def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1,
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
(C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
-def: Pat<(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)),
- (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u8ExtPred:$src2))>;
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>;
// Generate cmpgtu(Rs, #u9)
-def: Pat<(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)),
- (C2_cmpgtui IntRegs:$src1, u9ExtPred:$src2)>;
+def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>;
// Map from Rs >= Rt -> !(Rt > Rs).
// rs >= rt -> !(rt > rs).
@@ -5118,11 +5073,6 @@ def: Pat<(i32 (sext (i1 PredRegs:$src1))),
def: Pat<(i64 (sext (i1 PredRegs:$src1))),
(A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
-// Convert sign-extended load back to load and sign extend.
-// i32 -> i64
-def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
- (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>;
-
// Zero extends.
// i1 -> i32
def: Pat<(i32 (zext (i1 PredRegs:$src1))),
@@ -5136,12 +5086,6 @@ def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
(A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
- (i32 32))),
- (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
- (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (L2_loadri_io AddrFI:$srcLow, 0)))>;
-
// Multiply 64-bit unsigned and use upper result.
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
(A2_addp
@@ -5186,10 +5130,13 @@ let AddedComplexity = 100 in
def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
(i32 IntRegs:$src1)>;
-def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
+def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
-def : Pat<(HexagonWrapperJT tjumptable:$dst),
- (i32 (CONST32_set_jt tjumptable:$dst))>;
+def: Pat<(HexagonJT tjumptable:$dst),
+ (CONST32_set_jt tjumptable:$dst)>;
+def: Pat<(HexagonCP tconstpool :$dst),
+ (CONST32_set_jt tconstpool:$dst)>;
// XTYPE/SHIFT
//
@@ -5626,6 +5573,43 @@ let hasNewValue = 1 in {
def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>;
+
+def SDTHexagonINSERT_ri : SDTypeProfile<1, 4, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
+def SDTHexagonINSERT_rd : SDTypeProfile<1, 4, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
+def SDTHexagonINSERT_riv : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i64>]>;
+def SDTHexagonINSERT_rdv : SDTypeProfile<1, 3, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>,
+ SDTCisVT<3, i64>]>;
+def HexagonINSERT_ri : SDNode<"HexagonISD::INSERT_ri", SDTHexagonINSERT_ri>;
+def HexagonINSERT_rd : SDNode<"HexagonISD::INSERT_rd", SDTHexagonINSERT_rd>;
+def HexagonINSERT_riv: SDNode<"HexagonISD::INSERT_riv", SDTHexagonINSERT_riv>;
+def HexagonINSERT_rdv: SDNode<"HexagonISD::INSERT_rdv", SDTHexagonINSERT_rdv>;
+
+def: Pat<(HexagonINSERT_ri I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2),
+ (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>;
+
+def: Pat<(HexagonINSERT_rd I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2),
+ (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>;
+
+def: Pat<(HexagonINSERT_riv I32:$Rs, I32:$Rt, I64:$Ru),
+ (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
+
+def: Pat<(HexagonINSERT_rdv I64:$Rs, I64:$Rt, I64:$Ru),
+ (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+
+
//===----------------------------------------------------------------------===//
// Template class for 'extract bitfield' instructions
//===----------------------------------------------------------------------===//
@@ -5692,6 +5676,37 @@ let hasNewValue = 1 in {
def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>;
}
+def SDTHexagonEXTRACTU_ri : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTU_rd : SDTypeProfile<1, 3, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTU_riv : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i64>]>;
+def SDTHexagonEXTRACTU_rdv : SDTypeProfile<1, 2, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>]>;
+def HexagonEXTRACTU_ri : SDNode<"HexagonISD::EXTRACTU_ri", SDTHexagonEXTRACTU_ri>;
+def HexagonEXTRACTU_rd : SDNode<"HexagonISD::EXTRACTU_rd", SDTHexagonEXTRACTU_rd>;
+def HexagonEXTRACTU_riv: SDNode<"HexagonISD::EXTRACTU_riv", SDTHexagonEXTRACTU_riv>;
+def HexagonEXTRACTU_rdv: SDNode<"HexagonISD::EXTRACTU_rdv", SDTHexagonEXTRACTU_rdv>;
+
+def: Pat<(HexagonEXTRACTU_ri I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3),
+ (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>;
+
+def: Pat<(HexagonEXTRACTU_rd I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3),
+ (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>;
+
+def: Pat<(HexagonEXTRACTU_riv I32:$src1, I64:$src2),
+ (S2_extractu_rp I32:$src1, I64:$src2)>;
+
+def: Pat<(HexagonEXTRACTU_rdv I64:$src1, I64:$src2),
+ (S2_extractup_rp I64:$src1, I64:$src2)>;
+
// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
(M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>;
@@ -5728,6 +5743,22 @@ def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
//===----------------------------------------------------------------------===//
+// Template class for 'table index' instructions which are assembler mapped
+// to their :raw format.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in
+class tableidx_goodsyntax <string mnemonic>
+ : SInst <(outs IntRegs:$Rx),
+ (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5),
+ "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
+ [], "$Rx = $_dst_" >;
+
+def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">;
+def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">;
+def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">;
+def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">;
+
+//===----------------------------------------------------------------------===//
// V3 Instructions +
//===----------------------------------------------------------------------===//
@@ -5761,4 +5792,4 @@ include "HexagonInstrInfoV5.td"
// ALU32/64/Vector +
//===----------------------------------------------------------------------===///
-include "HexagonInstrInfoVector.td" \ No newline at end of file
+include "HexagonInstrInfoVector.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 0e4dde3..918b482 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -11,6 +11,25 @@
//
//===----------------------------------------------------------------------===//
+def DuplexIClass0: InstDuplex < 0 >;
+def DuplexIClass1: InstDuplex < 1 >;
+def DuplexIClass2: InstDuplex < 2 >;
+let isExtendable = 1 in {
+ def DuplexIClass3: InstDuplex < 3 >;
+ def DuplexIClass4: InstDuplex < 4 >;
+ def DuplexIClass5: InstDuplex < 5 >;
+ def DuplexIClass6: InstDuplex < 6 >;
+ def DuplexIClass7: InstDuplex < 7 >;
+}
+def DuplexIClass8: InstDuplex < 8 >;
+def DuplexIClass9: InstDuplex < 9 >;
+def DuplexIClassA: InstDuplex < 0xA >;
+def DuplexIClassB: InstDuplex < 0xB >;
+def DuplexIClassC: InstDuplex < 0xC >;
+def DuplexIClassD: InstDuplex < 0xD >;
+def DuplexIClassE: InstDuplex < 0xE >;
+def DuplexIClassF: InstDuplex < 0xF >;
+
def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
@@ -137,6 +156,9 @@ def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
+
+def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -247,10 +269,10 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>;
def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
-def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
- (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>;
-def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
- (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>;
+def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+ (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+ (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>;
// Preserve the S2_tstbit_r generation
def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
@@ -292,16 +314,15 @@ let opExtendable = 1 in
def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs),
"$Rdd = combine(#$s8, $Rs)">;
-def HexagonWrapperCombineRI_V4 :
- SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
-def HexagonWrapperCombineIR_V4 :
- SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
+// The complexity of the combines involving immediates should be greater
+// than the complexity of the combine with two registers.
+let AddedComplexity = 50 in {
+def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i),
+ (A4_combineri IntRegs:$r, s32ImmPred:$i)>;
-def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
- (A4_combineri IntRegs:$r, s8ExtPred:$i)>;
-
-def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
- (A4_combineir s8ExtPred:$i, IntRegs:$r)>;
+def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r),
+ (A4_combineir s32ImmPred:$i, IntRegs:$r)>;
+}
// A4_combineii: Set two small immediates.
let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
@@ -322,7 +343,7 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
// The complexity of the combine with two immediates should be greater than
// the complexity of a combine involving a register.
let AddedComplexity = 75 in
-def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u6ExtPred:$u6),
+def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6),
(A4_combineii imm:$s8, imm:$u6)>;
//===----------------------------------------------------------------------===//
@@ -346,20 +367,22 @@ multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
PatLeaf ImmPred, InstHexagon MI> {
def: Pat<(VT (Load AddrFI:$fi)),
(VT (ValueMod (MI AddrFI:$fi, 0)))>;
+ def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
+ (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
(VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
def: Pat<(VT (Load (i32 IntRegs:$Rs))),
(VT (ValueMod (MI IntRegs:$Rs, 0)))>;
}
-defm: Loadxm_pat<extloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>;
-defm: Loadxm_pat<extloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>;
-defm: Loadxm_pat<extloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>;
-defm: Loadxm_pat<zextloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>;
-defm: Loadxm_pat<zextloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>;
-defm: Loadxm_pat<zextloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>;
-defm: Loadxm_pat<sextloadi8, i64, Sext64, s11_0ExtPred, L2_loadrb_io>;
-defm: Loadxm_pat<sextloadi16, i64, Sext64, s11_1ExtPred, L2_loadrh_io>;
+defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
+defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
@@ -635,19 +658,6 @@ def: Pat<(i64 (zext (i1 PredRegs:$src1))),
def: Pat<(i64 (zext (i32 IntRegs:$src1))),
(Zext64 IntRegs:$src1)>;
-// zext i32->i64
-def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
- (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>;
-
-let AddedComplexity = 100 in
-def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1,
- s11_2ExtPred:$offset)))>;
-
-// anyext i32->i64
-def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
- (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>;
-
//===----------------------------------------------------------------------===//
// LD -
//===----------------------------------------------------------------------===//
@@ -768,8 +778,8 @@ multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
PatFrag stOp> {
def : Pat<(stOp (VT RC:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3)),
- (MI IntRegs:$src1, u2ImmPred:$src2, u0AlwaysExtPred:$src3, RC:$src4)>;
+ u32ImmPred:$src3)),
+ (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>;
def : Pat<(stOp (VT RC:$src4),
(add (shl IntRegs:$src1, u2ImmPred:$src2),
@@ -1157,17 +1167,17 @@ let AddedComplexity = 40 in {
// is not extendable. This could cause problems during removing the frame
// indices, since the offset with respect to R29/R30 may not fit in the
// u6 field.
- def: Storexm_add_pat<truncstorei8, s8ExtPred, u6_0ImmPred, ToImmByte,
+ def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte,
S4_storeirb_io>;
- def: Storexm_add_pat<truncstorei16, s8ExtPred, u6_1ImmPred, ToImmHalf,
+ def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf,
S4_storeirh_io>;
- def: Storexm_add_pat<store, s8ExtPred, u6_2ImmPred, ToImmWord,
+ def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord,
S4_storeiri_io>;
}
-def: Storexm_simple_pat<truncstorei8, s8ExtPred, ToImmByte, S4_storeirb_io>;
-def: Storexm_simple_pat<truncstorei16, s8ExtPred, ToImmHalf, S4_storeirh_io>;
-def: Storexm_simple_pat<store, s8ExtPred, ToImmWord, S4_storeiri_io>;
+def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>;
+def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>;
+def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>;
// memb(Rx++#s4:0:circ(Mu))=Rt
// memb(Rx++I:circ(Mu))=Rt
@@ -1798,6 +1808,49 @@ def: LogLogNot_pat<or, and, C4_or_andn>;
def: LogLogNot_pat<or, or, C4_or_orn>;
//===----------------------------------------------------------------------===//
+// PIC: Support for PIC compilations. The patterns and SD nodes defined
+// below are needed to support code generation for PIC
+//===----------------------------------------------------------------------===//
+
+def SDT_HexagonPICAdd
+ : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_HexagonGOTAdd
+ : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+
+def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>;
+def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>;
+def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternal>;
+def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternalJT>;
+def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternalBA>;
+
+// PIC: Map from a block address computation to a PC-relative add
+def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from the computation to generate a GOT pointer to a PC-relative add
+def: Pat<(Hexagonpic_add texternalsym:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from a jump table address computation to a PC-relative add
+def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from a GOT-relative symbol reference to a load
+def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2),
+ (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>;
+
+// PIC: Map from a static symbol reference to a PC-relative add
+def: Pat<(Hexagongat_pcrel tglobaladdr:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+//===----------------------------------------------------------------------===//
// CR -
//===----------------------------------------------------------------------===//
@@ -1836,7 +1889,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
"$Rd = add($Rs, add($Ru, #$s6))" ,
[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
- (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))],
+ (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))],
"", ALU64_tc_2_SLOT23> {
bits<5> Rd;
bits<5> Rs;
@@ -1877,19 +1930,19 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
}
// Rd=add(Rs,sub(#s6,Ru))
-def: Pat<(add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2,
+def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2,
(i32 IntRegs:$src3))),
- (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>;
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
// Rd=sub(add(Rs,#s6),Ru)
-def: Pat<(sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2),
+def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2),
(i32 IntRegs:$src3)),
- (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>;
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
// Rd=add(sub(Rs,Ru),#s6)
def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
- (s6_10ExtPred:$src2)),
- (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>;
+ (s32ImmPred:$src2)),
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
// Add or subtract doublewords with carry.
@@ -2042,7 +2095,7 @@ def S4_or_andix:
(ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10),
"$Rx = or($Ru, and($_src_, #$s10))" ,
[(set (i32 IntRegs:$Rx),
- (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] ,
+ (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] ,
"$_src_ = $Rx", ALU64_tc_2_SLOT23> {
bits<5> Rx;
bits<5> Ru;
@@ -2187,7 +2240,7 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
(ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10),
"$Rx |= "#mnemonic#"($Rs, #$s10)",
[(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
- (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))],
+ (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))],
"$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
bits<5> Rx;
bits<5> Rs;
@@ -2349,7 +2402,7 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
"$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
[(set (i32 IntRegs:$Rd),
(add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6),
- u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+ u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
bits<5> Rd;
bits<6> u6;
bits<5> Rs;
@@ -2374,7 +2427,7 @@ def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
(ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
"$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
[(set (i32 IntRegs:$Rd),
- (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))],
+ (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))],
"", ALU64_tc_3x_SLOT23>, ImmRegRel {
bits<5> Rd;
bits<6> u6;
@@ -2424,7 +2477,7 @@ def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
CextOpcode = "ADD_MPY", InputType = "imm" in
-def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred,
+def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred,
(ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel;
// Rx=add(Ru,mpyi(Rx,Rs))
@@ -2447,17 +2500,6 @@ def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
let Inst{20-16} = Rs;
}
-// Rd=add(##,mpyi(Rs,#U6))
-def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
- (HexagonCONST32 tglobaladdr:$src1)),
- (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2,
- u6ImmPred:$src3))>;
-
-// Rd=add(##,mpyi(Rs,Rt))
-def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
- (HexagonCONST32 tglobaladdr:$src1)),
- (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2,
- IntRegs:$src3))>;
// Vector reduce multiply word by signed half (32x16)
//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
@@ -2569,7 +2611,7 @@ class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
: MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5),
"$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
[(set (i32 IntRegs:$Rd),
- (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))],
+ (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))],
"$Rd = $Rx", Itin> {
bits<5> Rd;
@@ -2904,7 +2946,7 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
// mem[bh](Rs+#u6) += #U5
//===----------------------------------------------------------------------===//
-multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
InstHexagon MI, SDNode OpNode> {
let AddedComplexity = 180 in
def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
@@ -2912,24 +2954,24 @@ multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
(MI IntRegs:$addr, 0, u5ImmPred:$addend)>;
let AddedComplexity = 190 in
- def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
+ def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)),
u5ImmPred:$addend),
- (add IntRegs:$base, ExtPred:$offset)),
- (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+ (add IntRegs:$base, ImmPred:$offset)),
+ (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>;
}
-multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
InstHexagon addMI, InstHexagon subMI> {
- defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
- defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+ defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>;
+ defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>;
}
multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+ defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
L4_iadd_memoph_io, L4_isub_memoph_io>;
// Byte
- defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+ defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred,
L4_iadd_memopb_io, L4_isub_memopb_io>;
}
@@ -2939,7 +2981,7 @@ let Predicates = [UseMEMOP] in {
defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm: MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io,
+ defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io,
L4_isub_memopw_io>;
}
@@ -2950,7 +2992,7 @@ let Predicates = [UseMEMOP] in {
// mem[bh](Rs+#u6) += #m5
//===----------------------------------------------------------------------===//
-multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
PatLeaf immPred, SDNodeXForm xformFunc,
InstHexagon MI> {
let AddedComplexity = 190 in
@@ -2958,18 +3000,18 @@ multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
(MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>;
let AddedComplexity = 195 in
- def: Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
+ def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)),
immPred:$subend),
- (add IntRegs:$base, extPred:$offset)),
- (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+ (add IntRegs:$base, ImmPred:$offset)),
+ (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>;
}
multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
+ defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred,
MEMOPIMM_HALF, L4_isub_memoph_io>;
// Byte
- defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
+ defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred,
MEMOPIMM_BYTE, L4_isub_memopb_io>;
}
@@ -2979,7 +3021,7 @@ let Predicates = [UseMEMOP] in {
defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm: MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
+ defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred,
MEMOPIMM, L4_isub_memopw_io>;
}
@@ -3008,16 +3050,16 @@ multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> {
// Byte - clrbit
- defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
+ defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred,
CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
// Byte - setbit
- defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred,
+ defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred,
SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
// Half Word - clrbit
- defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
+ defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred,
CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
// Half Word - setbit
- defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
+ defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred,
SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
}
@@ -3030,9 +3072,9 @@ let Predicates = [UseMEMOP] in {
// memw(Rs+#0) = [clrbit|setbit](#U5)
// memw(Rs+#u6:2) = [clrbit|setbit](#U5)
- defm: MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, CLRMEMIMM,
+ defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM,
L4_iand_memopw_io, and>;
- defm: MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, SETMEMIMM,
+ defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM,
L4_ior_memopw_io, or>;
}
@@ -3070,11 +3112,11 @@ multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+ defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
L4_add_memoph_io, L4_sub_memoph_io,
L4_and_memoph_io, L4_or_memoph_io>;
// Byte
- defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+ defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred,
L4_add_memopb_io, L4_sub_memopb_io,
L4_and_memopb_io, L4_or_memopb_io>;
}
@@ -3086,7 +3128,7 @@ let Predicates = [UseMEMOP] in {
defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm: MemOPr_ALUOp <load, store, u6_2ExtPred, L4_add_memopw_io,
+ defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io,
L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>;
}
@@ -3110,23 +3152,23 @@ def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>;
def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>;
def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>;
-def : T_CMP_pat <C4_cmpneqi, setne, s10ExtPred>;
-def : T_CMP_pat <C4_cmpltei, setle, s10ExtPred>;
+def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>;
+def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>;
def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>;
// rs <= rt -> !(rs > rt).
/*
-def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>;
-// (C4_cmpltei IntRegs:$src1, s10ExtPred:$src2)>;
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
+// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>;
*/
// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
-def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>;
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
// rs != rt -> !(rs == rt).
-def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (C4_cmpneqi IntRegs:$src1, s10ExtPred:$src2)>;
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>;
// SDNode for converting immediate C to C-1.
def DEC_CONST_BYTE : SDNodeXForm<imm, [{
@@ -3136,168 +3178,6 @@ def DEC_CONST_BYTE : SDNodeXForm<imm, [{
}]>;
// For the sequence
-// zext( seteq ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.eq(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>;
-
-// For the sequence
-// zext( setne ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.eq(Rs, #u8)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 0, 1))>;
-
-// For the sequence
-// zext( seteq (Rs, and(Rt, 255)))
-// Generate
-// Pd=cmpb.eq(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt),
- (i32 (and (i32 IntRegs:$Rs), 255)))))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 1, 0))>;
-
-// For the sequence
-// zext( setne (Rs, and(Rt, 255)))
-// Generate
-// Pd=cmpb.eq(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt),
- (i32 (and (i32 IntRegs:$Rs), 255)))))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>;
-
-// For the sequence
-// zext( setugt ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.gtu(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>;
-
-// For the sequence
-// zext( setugt ( and(Rs, 254), u8))
-// Generate
-// Pd=cmpb.gtu(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>;
-
-// For the sequence
-// zext( setult ( Rs, Rt))
-// Generate
-// Pd=cmp.ltu(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 1, 0))>;
-
-// For the sequence
-// zext( setlt ( Rs, Rt))
-// Generate
-// Pd=cmp.lt(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 1, 0))>;
-
-// For the sequence
-// zext( setugt ( Rs, Rt))
-// Generate
-// Pd=cmp.gtu(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 1, 0))>;
-
-// This pattern interefers with coremark performance, not implementing at this
-// time.
-// For the sequence
-// zext( setgt ( Rs, Rt))
-// Generate
-// Pd=cmp.gt(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-
-// For the sequence
-// zext( setuge ( Rs, Rt))
-// Generate
-// Pd=cmp.ltu(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 0, 1))>;
-
-// For the sequence
-// zext( setge ( Rs, Rt))
-// Generate
-// Pd=cmp.lt(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 0, 1))>;
-
-// For the sequence
-// zext( setule ( Rs, Rt))
-// Generate
-// Pd=cmp.gtu(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>;
-
-// For the sequence
-// zext( setle ( Rs, Rt))
-// Generate
-// Pd=cmp.gt(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>;
-
-// For the sequence
// zext( setult ( and(Rs, 255), u8))
// Use the isdigit transformation below
@@ -3381,26 +3261,17 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
// Restore registers and dealloc return function call.
let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
- def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "jump $dst",
- []>;
+ def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
}
// Restore registers and dealloc frame before a tail call.
let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "call $dst",
- []>;
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
}
// Save registers function call.
let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
- def SAVE_REGISTERS_CALL_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "call $dst // Save_calle_saved_registers",
- []>;
+ def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
}
//===----------------------------------------------------------------------===//
@@ -3472,7 +3343,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
//===----------------------------------------------------------------------===//
class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<2> MajOp, bit isHalf>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>,
+ : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>,
AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3513,7 +3384,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
isNewValue = 1, opNewValue = 1 in
class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
- : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src),
+ : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src),
mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new",
[], "", V2LDST_tc_st_SLOT0> {
bits<19> addr;
@@ -3743,7 +3614,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel {
+ : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3903,17 +3774,17 @@ def: Pat<(i64 (ctlz I64:$src1)), (Zext64 (S2_cl0p I64:$src1))>;
def: Pat<(i64 (cttz I64:$src1)), (Zext64 (S2_ct0p I64:$src1))>;
let AddedComplexity = 30 in {
- def: Storea_pat<truncstorei8, I32, u0AlwaysExtPred, S2_storerbabs>;
- def: Storea_pat<truncstorei16, I32, u0AlwaysExtPred, S2_storerhabs>;
- def: Storea_pat<store, I32, u0AlwaysExtPred, S2_storeriabs>;
+ def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>;
+ def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>;
}
let AddedComplexity = 30 in {
- def: Loada_pat<load, i32, u0AlwaysExtPred, L4_loadri_abs>;
- def: Loada_pat<sextloadi8, i32, u0AlwaysExtPred, L4_loadrb_abs>;
- def: Loada_pat<zextloadi8, i32, u0AlwaysExtPred, L4_loadrub_abs>;
- def: Loada_pat<sextloadi16, i32, u0AlwaysExtPred, L4_loadrh_abs>;
- def: Loada_pat<zextloadi16, i32, u0AlwaysExtPred, L4_loadruh_abs>;
+ def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>;
+ def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>;
+ def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>;
+ def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>;
+ def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>;
}
// Indexed store word - global address.
@@ -4012,6 +3883,18 @@ def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>;
def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>;
def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>;
+let Constraints = "@earlyclobber $dst" in
+def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b,
+ IntRegs:$c, IntRegs:$d),
+ ".error \"Should never try to emit Insert4\"",
+ [(set (i64 DoubleRegs:$dst),
+ (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
+ (i32 16)),
+ (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
+ (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
+ (i32 32))),
+ (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>;
+
//===----------------------------------------------------------------------===//
// :raw for of boundscheck:hi:lo insns
//===----------------------------------------------------------------------===//
@@ -4116,7 +3999,7 @@ class CJInst_tstbit_R0<string px, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
""#px#" = tstbit($Rs, #0); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<11> r9_2;
@@ -4162,7 +4045,7 @@ class CJInst_RR<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs, $Rt); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<4> Rt;
bits<11> r9_2;
@@ -4216,7 +4099,7 @@ class CJInst_RU5<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs, #$U5); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<5> U5;
bits<11> r9_2;
@@ -4271,7 +4154,7 @@ class CJInst_Rn1<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs,#-1); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<11> r9_2;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 19b0935..337f4ea 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -139,11 +139,11 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
let Inst{20-16} = Rss;
}
-defm: Loadx_pat<load, f32, s11_2ExtPred, L2_loadri_io>;
-defm: Loadx_pat<load, f64, s11_3ExtPred, L2_loadrd_io>;
+defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
+defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
-defm: Storex_pat<store, F32, s11_2ExtPred, S2_storeri_io>;
-defm: Storex_pat<store, F64, s11_3ExtPred, S2_storerd_io>;
+defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
def: Storex_simple_pat<store, F32, S2_storeri_io>;
def: Storex_simple_pat<store, F64, S2_storerd_io>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td
index 6e67b6e..f4fb946 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoVector.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -20,6 +20,34 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
+
+multiclass bitconvert_32<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a IntRegs:$src))),
+ (b IntRegs:$src)>;
+ def : Pat <(a (bitconvert (b IntRegs:$src))),
+ (a IntRegs:$src)>;
+}
+
+multiclass bitconvert_64<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a DoubleRegs:$src))),
+ (b DoubleRegs:$src)>;
+ def : Pat <(a (bitconvert (b DoubleRegs:$src))),
+ (a DoubleRegs:$src)>;
+}
+
+// Bit convert vector types.
+defm : bitconvert_32<v4i8, i32>;
+defm : bitconvert_32<v2i16, i32>;
+defm : bitconvert_32<v2i16, v4i8>;
+
+defm : bitconvert_64<v8i8, i64>;
+defm : bitconvert_64<v4i16, i64>;
+defm : bitconvert_64<v2i32, i64>;
+defm : bitconvert_64<v8i8, v4i16>;
+defm : bitconvert_64<v8i8, v2i32>;
+defm : bitconvert_64<v4i16, v2i32>;
+
+
// Vector shift support. Vector shifting in Hexagon is rather different
// from internal representation of LLVM.
// LLVM assumes all shifts (in vector case) will have the form
@@ -44,6 +72,12 @@ class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
let Inst{12-8} = src2;
}
+def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
+
+def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
+
def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
@@ -52,6 +86,87 @@ def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
+
+def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
+def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+
+// Replicate the low 8-bits from 32-bits input register into each of the
+// four bytes of 32-bits destination register.
+def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+
+// Replicate the low 16-bits from 32-bits input register into each of the
+// four halfwords of 64-bits destination register.
+def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+
+
+class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
+ : Pat <(Op Type:$Rss, Type:$Rtt),
+ (MI Type:$Rss, Type:$Rtt)>;
+
+def: VArith_pat <A2_vaddub, add, V8I8>;
+def: VArith_pat <A2_vaddh, add, V4I16>;
+def: VArith_pat <A2_vaddw, add, V2I32>;
+def: VArith_pat <A2_vsubub, sub, V8I8>;
+def: VArith_pat <A2_vsubh, sub, V4I16>;
+def: VArith_pat <A2_vsubw, sub, V2I32>;
+
+def: VArith_pat <A2_and, and, V2I16>;
+def: VArith_pat <A2_xor, xor, V2I16>;
+def: VArith_pat <A2_or, or, V2I16>;
+
+def: VArith_pat <A2_andp, and, V8I8>;
+def: VArith_pat <A2_andp, and, V4I16>;
+def: VArith_pat <A2_andp, and, V2I32>;
+def: VArith_pat <A2_orp, or, V8I8>;
+def: VArith_pat <A2_orp, or, V4I16>;
+def: VArith_pat <A2_orp, or, V2I32>;
+def: VArith_pat <A2_xorp, xor, V8I8>;
+def: VArith_pat <A2_xorp, xor, V4I16>;
+def: VArith_pat <A2_xorp, xor, V2I32>;
+
+def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_asr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_lsr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_asl_i_vw V2I32:$b, imm:$c)>;
+
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_asr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_lsr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_asl_i_vh V4I16:$b, imm:$c)>;
+
+
+def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
+def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+
+def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+
+def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+
// Vector shift words by register
def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
@@ -63,3 +178,306 @@ def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
+
+class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(Op Value:$Rs, I32:$Rt),
+ (MI Value:$Rs, I32:$Rt)>;
+
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
+
+
+def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
+def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
+def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
+
+def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
+
+
+class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
+ (MI Value:$Rs, Value:$Rt)>;
+
+def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
+def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
+def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
+
+def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
+
+def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+
+
+class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
+ : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
+ (MI InVal:$Rs, InVal:$Rt)>;
+
+def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
+
+def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
+
+
+// Hexagon doesn't have a vector multiply with C semantics.
+// Instead, generate a pseudo instruction that gets expaneded into two
+// scalar MPYI instructions.
+// This is expanded by ExpandPostRAPseudos.
+let isPseudo = 1 in
+def VMULW : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"Should never try to emit VMULW\"",
+ [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
+
+let isPseudo = 1 in
+def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"Should never try to emit VMULW_ACC\"",
+ [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
+ "$Rd = $Rx">;
+
+// Adds two v4i8: Hexagon does not have an insn for this one, so we
+// use the double add v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+// Subtract two v4i8: Hexagon does not have an insn for this one, so we
+// use the double sub v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// No 32 bit vector mux.
+//
+def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// 64-bit vector mux.
+//
+def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
+ (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
+def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
+ (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
+def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
+ (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+
+//
+// No 32 bit vector compare.
+//
+def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
+ (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+
+class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
+ ValueType CmpTy>
+ : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
+ (InvMI Value:$Rt, Value:$Rs)>;
+
+// Map from a compare operation to the corresponding instruction with the
+// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
+
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
+
+// Map from vcmpne(Rss) -> !vcmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
+ (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+
+
+// Truncate: from vector B copy all 'E'ven 'B'yte elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
+def: Pat<(v4i8 (trunc V4I16:$Rs)),
+ (S2_vtrunehb V4I16:$Rs)>;
+
+// Truncate: from vector B copy all 'O'dd 'B'yte elements:
+// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
+// S2_vtrunohb
+
+// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
+// S2_vtruneh
+
+def: Pat<(v2i16 (trunc V2I32:$Rs)),
+ (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
+
+
+def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
+def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
+
+def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
+def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
+
+def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
+
+// Sign extends a v2i8 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
+ (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
+
+// Sign extends a v2i16 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
+ (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
+
+
+// Multiplies two v2i16 and returns a v2i32. We are using here the
+// saturating multiply, as hexagon does not provide a non saturating
+// vector multiply, and saturation does not impact the result that is
+// in double precision of the operands.
+
+// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
+// with the C semantics for this one, this pattern uses the half word
+// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
+// then truncated to fit this back into a v2i16 and to simulate the
+// wrap around semantics for unsigned in C.
+def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
+
+def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
+ (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+
+// Multiplies two v4i16 vectors.
+def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
+ (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
+ (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
+
+def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
+ (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
+
+// Multiplies two v4i8 vectors.
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
+
+// Multiplies two v8i8 vectors.
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
+
+
+class shuffler<SDNode Op, string Str>
+ : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
+ "$a = " # Str # "($b, $c)",
+ [(set (i64 DoubleRegs:$a),
+ (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
+ "", S_3op_tc_1_SLOT23>;
+
+def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
+
+def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
+def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
+def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
+def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
+
+class ShufflePat<InstHexagon MI, SDNode Op>
+ : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
+def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
+
+// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
+def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
+
+// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
+def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
+
+// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
+def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
+
+
+// Truncated store from v4i16 to v4i8.
+def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
+
+// Truncated store from v2i32 to v2i16.
+def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
+
+def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
+ (LoReg $Rs))))>;
+
+def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
+
+
+// Zero and sign extended load from v2i8 into v2i16.
+def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
+ (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
+
+def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
+ (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index c0551e8..4275230 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -690,16 +690,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>;
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
-def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs),
- (I32:$Rt))),
+def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
(i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
// Mux
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>;
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
// Shift halfword
def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
@@ -720,17 +719,17 @@ def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>;
def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>;
def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
-def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>;
-def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>;
-def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>;
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
-def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
(i32 (C2_cmpgti (I32:$src1),
- (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
-def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
(i32 (C2_cmpgtui (I32:$src1),
- (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
+ (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
@@ -1258,6 +1257,30 @@ def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))),
def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))),
(i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>;
+/********************************************************************
+* ST
+*********************************************************************/
+
+class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
+ : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
+ (MI I32:$Rs, Val:$Rt, I32:$Ru)>;
+
+def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>;
+def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>;
+def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>;
+def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>;
+def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>;
+
+class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
+ : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
+ (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>;
+
+def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>;
+def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>;
+def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>;
+def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
+
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
include "HexagonIntrinsicsV5.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
index 8d068eb..c80a188 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -234,17 +234,17 @@ def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
*********************************************************************/
// Combine Words Into Doublewords.
-def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s8ExtPred>;
-def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s8ExtPred>;
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>;
/********************************************************************
* ALU32/PRED *
*********************************************************************/
// Compare
-def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s10ExtPred>;
-def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s10ExtPred>;
-def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u9ExtPred>;
+def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>;
+def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>;
+def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>;
def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 806d448..81af4db 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -199,10 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
// of registers by individual passes in the backend. At this time,
// we don't know the scope of usage and definitions of these
// instructions.
- if (MII->getOpcode() == Hexagon::TFR_condset_ii ||
- MII->getOpcode() == Hexagon::TFR_condset_ri ||
- MII->getOpcode() == Hexagon::TFR_condset_ir ||
- MII->getOpcode() == Hexagon::LDriw_pred ||
+ if (MII->getOpcode() == Hexagon::LDriw_pred ||
MII->getOpcode() == Hexagon::STriw_pred)
return false;
}
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 318ca72..450f594 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -66,162 +66,131 @@ def nOneImm : Operand<i32>;
// Immediate predicates
//
def s32ImmPred : PatLeaf<(i32 imm), [{
- // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<32>(v);
}]>;
-def s32_24ImmPred : PatLeaf<(i32 imm), [{
- // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
- // extended field that is a multiple of 0x1000000.
+def s32_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s31_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s30_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s29_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s22_10ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<22,10>(v);
+}]>;
+
+def s8_24ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<32,24>(v);
+ return isShiftedInt<8,24>(v);
}]>;
-def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
- // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
- // extended field that is a multiple of 0x10000.
+def s16_16ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<24,16>(v);
+ return isShiftedInt<16,16>(v);
}]>;
def s26_6ImmPred : PatLeaf<(i32 imm), [{
- // s26_6ImmPred predicate - True if the immediate fits in a 32-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<26,6>(v);
}]>;
-
def s16ImmPred : PatLeaf<(i32 imm), [{
- // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<16>(v);
}]>;
-
def s13ImmPred : PatLeaf<(i32 imm), [{
- // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<13>(v);
}]>;
-
def s12ImmPred : PatLeaf<(i32 imm), [{
- // s12ImmPred predicate - True if the immediate fits in a 12-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<12>(v);
}]>;
def s11_0ImmPred : PatLeaf<(i32 imm), [{
- // s11_0ImmPred predicate - True if the immediate fits in a 11-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<11>(v);
}]>;
-
def s11_1ImmPred : PatLeaf<(i32 imm), [{
- // s11_1ImmPred predicate - True if the immediate fits in a 12-bit
- // sign extended field and is a multiple of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,1>(v);
}]>;
-
def s11_2ImmPred : PatLeaf<(i32 imm), [{
- // s11_2ImmPred predicate - True if the immediate fits in a 13-bit
- // sign extended field and is a multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,2>(v);
}]>;
-
def s11_3ImmPred : PatLeaf<(i32 imm), [{
- // s11_3ImmPred predicate - True if the immediate fits in a 14-bit
- // sign extended field and is a multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,3>(v);
}]>;
-
def s10ImmPred : PatLeaf<(i32 imm), [{
- // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<10>(v);
}]>;
-
def s9ImmPred : PatLeaf<(i32 imm), [{
- // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<9>(v);
}]>;
def m9ImmPred : PatLeaf<(i32 imm), [{
- // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude
- // field. The range of m9 is -255 to 255.
int64_t v = (int64_t)N->getSExtValue();
return isInt<9>(v) && (v != -256);
}]>;
def s8ImmPred : PatLeaf<(i32 imm), [{
- // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<8>(v);
}]>;
-
def s8Imm64Pred : PatLeaf<(i64 imm), [{
- // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<8>(v);
}]>;
-
def s6ImmPred : PatLeaf<(i32 imm), [{
- // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<6>(v);
}]>;
-
def s4_0ImmPred : PatLeaf<(i32 imm), [{
- // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<4>(v);
}]>;
-
def s4_1ImmPred : PatLeaf<(i32 imm), [{
- // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,1>(v);
}]>;
-
def s4_2ImmPred : PatLeaf<(i32 imm), [{
- // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field that is a multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,2>(v);
}]>;
-
def s4_3ImmPred : PatLeaf<(i32 imm), [{
- // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field that is a multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,3>(v);
}]>;
@@ -233,56 +202,61 @@ def u64ImmPred : PatLeaf<(i64 imm), [{
}]>;
def u32ImmPred : PatLeaf<(i32 imm), [{
- // u32ImmPred predicate - True if the immediate fits in a 32-bit field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<32>(v);
}]>;
+def u32_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u31_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<31,1>(v);
+}]>;
+
+def u30_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<30,2>(v);
+}]>;
+
+def u29_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<29,3>(v);
+}]>;
+
def u26_6ImmPred : PatLeaf<(i32 imm), [{
- // u26_6ImmPred - True if the immediate fits in a 32-bit field and
- // is a multiple of 64.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<26,6>(v);
}]>;
def u16ImmPred : PatLeaf<(i32 imm), [{
- // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<16>(v);
}]>;
def u16_s8ImmPred : PatLeaf<(i32 imm), [{
- // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
- // extended s8 field.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<16,8>(v);
}]>;
def u16_0ImmPred : PatLeaf<(i32 imm), [{
- // True if the immediate fits in a 16-bit unsigned field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<16>(v);
}]>;
def u11_3ImmPred : PatLeaf<(i32 imm), [{
- // True if the immediate fits in a 14-bit unsigned field, and the lowest
- // three bits are 0.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<11,3>(v);
}]>;
def u9ImmPred : PatLeaf<(i32 imm), [{
- // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<9>(v);
}]>;
-
def u8ImmPred : PatLeaf<(i32 imm), [{
- // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<8>(v);
}]>;
@@ -294,81 +268,56 @@ def u7StrictPosImmPred : ImmLeaf<i32, [{
}]>;
def u7ImmPred : PatLeaf<(i32 imm), [{
- // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<7>(v);
}]>;
-
def u6ImmPred : PatLeaf<(i32 imm), [{
- // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<6>(v);
}]>;
def u6_0ImmPred : PatLeaf<(i32 imm), [{
- // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field. Same as u6ImmPred.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<6>(v);
}]>;
def u6_1ImmPred : PatLeaf<(i32 imm), [{
- // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned
- // field that is 1 bit alinged - multiple of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,1>(v);
}]>;
def u6_2ImmPred : PatLeaf<(i32 imm), [{
- // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned
- // field that is 2 bits alinged - multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,2>(v);
}]>;
def u6_3ImmPred : PatLeaf<(i32 imm), [{
- // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned
- // field that is 3 bits alinged - multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,3>(v);
}]>;
def u5ImmPred : PatLeaf<(i32 imm), [{
- // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<5>(v);
}]>;
def u4ImmPred : PatLeaf<(i32 imm), [{
- // u4ImmPred predicate - True if the immediate fits in a 4-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<4>(v);
}]>;
def u3ImmPred : PatLeaf<(i32 imm), [{
- // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<3>(v);
}]>;
-
def u2ImmPred : PatLeaf<(i32 imm), [{
- // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<2>(v);
}]>;
-
def u1ImmPred : PatLeaf<(i1 imm), [{
- // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<1>(v);
}]>;
@@ -511,212 +460,6 @@ let PrintMethod = "printExtOperand" in {
def u6_3Ext : Operand<i32>;
}
-let PrintMethod = "printImmOperand" in
-def u0AlwaysExt : Operand<i32>;
-
-// Predicates for constant extendable operands
-def s16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<16>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s10ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<10>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s9ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<9>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s8ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s8_16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 16-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<16>(v);
-}]>;
-
-def s6ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s6_16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 16-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<16>(v);
-}]>;
-
-def s6_10ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 10-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<10>(v);
-}]>;
-
-def s11_0ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<11>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s11_1ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<12>(v))
- return isShiftedInt<11,1>(v);
-
- // Return true if extending this immediate is profitable and the low 1 bit
- // is zero (2-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0);
-}]>;
-
-def s11_2ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<13>(v))
- return isShiftedInt<11,2>(v);
-
- // Return true if extending this immediate is profitable and the low 2-bits
- // are zero (4-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0);
-}]>;
-
-def s11_3ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<14>(v))
- return isShiftedInt<11,3>(v);
-
- // Return true if extending this immediate is profitable and the low 3-bits
- // are zero (8-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0);
-}]>;
-
-def u0AlwaysExtPred : PatLeaf<(i32 imm), [{
- // Predicate for an unsigned 32-bit value that always needs to be extended.
- if (isConstExtProfitable(Node)) {
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<32>(v);
- }
- return false;
-}]>;
-
-def u6ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
-}]>;
-
-def u7ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<7>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
-}]>;
-
-def u8ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
-}]>;
-
-def u9ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<9>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
-}]>;
-
-def u6_1ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<7>(v))
- return isShiftedUInt<6,1>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0);
-}]>;
-
-def u6_2ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<8>(v))
- return isShiftedUInt<6,2>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0);
-}]>;
-
-def u6_3ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<9>(v))
- return isShiftedUInt<6,3>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0);
-}]>;
-
// This complex pattern exists only to create a machine instruction operand
// of type "frame index". There doesn't seem to be a way to do that directly
@@ -729,41 +472,8 @@ def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
-// Addressing modes.
-
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
-def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
-def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
-def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
-def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
-def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
-def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
-def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
-
// Address operands.
-def MEMrr : Operand<i32> {
- let PrintMethod = "printMEMrrOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri : Operand<i32> {
- let PrintMethod = "printMEMriOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri_s11_2 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
- let PrintMethod = "printMEMriOperand";
- let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-def FrameIndex : Operand<i32> {
- let PrintMethod = "printFrameIndexOperand";
- let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
let PrintMethod = "printGlobalOperand" in {
def globaladdress : Operand<i32>;
def globaladdressExt : Operand<i32>;
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index afd3a17..503bfdb 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -271,15 +271,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
switch (Op) {
case Hexagon::C2_mux:
case Hexagon::C2_muxii:
- case Hexagon::TFR_condset_ii:
NewOp = Op;
break;
- case Hexagon::TFR_condset_ri:
- NewOp = Hexagon::TFR_condset_ir;
- break;
- case Hexagon::TFR_condset_ir:
- NewOp = Hexagon::TFR_condset_ri;
- break;
case Hexagon::C2_muxri:
NewOp = Hexagon::C2_muxir;
break;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 3df98d6..86eaee8 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -37,11 +37,8 @@
using namespace llvm;
-
-HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st)
- : HexagonGenRegisterInfo(Hexagon::R31),
- Subtarget(st) {
-}
+HexagonRegisterInfo::HexagonRegisterInfo()
+ : HexagonGenRegisterInfo(Hexagon::R31) {}
const MCPhysReg *
HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -51,7 +48,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
};
- switch(Subtarget.getHexagonArchVersion()) {
+ switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
return CalleeSavedRegsV3;
@@ -89,7 +86,7 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
&Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
};
- switch(Subtarget.getHexagonArchVersion()) {
+ switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
return CalleeSavedRegClassesV3;
@@ -122,7 +119,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset -= 2 * Hexagon_WordSize;
}
- const unsigned FrameSize = MFI.getStackSize();
+ unsigned FrameSize = MFI.getStackSize();
+ if (MI.getOpcode() == Hexagon::TFR_FI)
+ MI.setDesc(TII.get(Hexagon::A2_addi));
if (!MFI.hasVarSizedObjects() &&
TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index a83b502..dc6dd2a 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -37,19 +37,11 @@
#define HEXAGON_RESERVED_REG_2 Hexagon::R11
namespace llvm {
-
-class HexagonSubtarget;
-class HexagonInstrInfo;
-class Type;
-
struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
- HexagonSubtarget &Subtarget;
-
- HexagonRegisterInfo(HexagonSubtarget &st);
+ HexagonRegisterInfo();
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const TargetRegisterClass* const*
getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const;
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index ce6a39a..1a4c7ae 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -71,6 +71,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
return true;
const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -82,82 +83,78 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
while (MII != MIE) {
MachineInstr *MI = MII;
int Opc = MI->getOpcode();
- if (Opc == Hexagon::CONST32_set) {
+ if (Opc == Hexagon::CONST32_set_jt) {
int DestReg = MI->getOperand(0).getReg();
MachineOperand &Symbol = MI->getOperand (1);
-
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LO), DestReg).addOperand(Symbol);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HI), DestReg).addOperand(Symbol);
- // MBB->erase returns the iterator to the next instruction, which is the
- // one we want to process next
- MII = MBB->erase (MI);
- continue;
- }
- else if (Opc == Hexagon::CONST32_set_jt) {
- int DestReg = MI->getOperand(0).getReg();
- MachineOperand &Symbol = MI->getOperand (1);
+ TII->get(Hexagon::A2_tfrsi), DestReg).addOperand(Symbol);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LO_jt), DestReg).addOperand(Symbol);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HI_jt), DestReg).addOperand(Symbol);
// MBB->erase returns the iterator to the next instruction, which is the
// one we want to process next
MII = MBB->erase (MI);
continue;
}
- else if (Opc == Hexagon::CONST32_Label) {
+ else if (Opc == Hexagon::CONST32_Int_Real &&
+ MI->getOperand(1).isBlockAddress()) {
int DestReg = MI->getOperand(0).getReg();
MachineOperand &Symbol = MI->getOperand (1);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LO_PIC), DestReg).addOperand(Symbol);
+ TII->get(Hexagon::LO), DestReg).addOperand(Symbol);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HI_PIC), DestReg).addOperand(Symbol);
+ TII->get(Hexagon::HI), DestReg).addOperand(Symbol);
// MBB->erase returns the iterator to the next instruction, which is the
// one we want to process next
MII = MBB->erase (MI);
continue;
}
- else if (Opc == Hexagon::CONST32_Int_Real) {
+
+ else if (Opc == Hexagon::CONST32_Int_Real ||
+ Opc == Hexagon::CONST32_Float_Real) {
int DestReg = MI->getOperand(0).getReg();
- int64_t ImmValue = MI->getOperand(1).getImm ();
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestReg).addImm(ImmValue);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestReg).addImm(ImmValue);
+ // We have to convert an FP immediate into its corresponding integer
+ // representation
+ int64_t ImmValue;
+ if (Opc == Hexagon::CONST32_Float_Real) {
+ APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+ ImmValue = *Val.bitcastToAPInt().getRawData();
+ }
+ else
+ ImmValue = MI->getOperand(1).getImm();
+
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue);
MII = MBB->erase (MI);
continue;
}
- else if (Opc == Hexagon::CONST64_Int_Real) {
+ else if (Opc == Hexagon::CONST64_Int_Real ||
+ Opc == Hexagon::CONST64_Float_Real) {
int DestReg = MI->getOperand(0).getReg();
- int64_t ImmValue = MI->getOperand(1).getImm ();
- unsigned DestLo = Fn.getSubtarget().getRegisterInfo()->getSubReg(
- DestReg, Hexagon::subreg_loreg);
- unsigned DestHi = Fn.getSubtarget().getRegisterInfo()->getSubReg(
- DestReg, Hexagon::subreg_hireg);
+
+ // We have to convert an FP immediate into its corresponding integer
+ // representation
+ int64_t ImmValue;
+ if (Opc == Hexagon::CONST64_Float_Real) {
+ APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+ ImmValue = *Val.bitcastToAPInt().getRawData();
+ }
+ else
+ ImmValue = MI->getOperand(1).getImm();
+
+ unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg);
+ unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg);
int32_t LowWord = (ImmValue & 0xFFFFFFFF);
int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
- // Lower Registers Lower Half
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestLo).addImm(LowWord);
- // Lower Registers Higher Half
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestLo).addImm(LowWord);
- // Higher Registers Lower Half
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestHi).addImm(HighWord);
- // Higher Registers Higher Half.
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestHi).addImm(HighWord);
+ TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord);
MII = MBB->erase (MI);
continue;
- }
+ }
++MII;
}
}
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
deleted file mode 100644
index 8873bb9..0000000
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//
-//===----------------------------------------------------------------------===//
-// This pass tries to provide opportunities for better optimization of muxes.
-// The default code generated for something like: flag = (a == b) ? 1 : 3;
-// would be:
-//
-// {p0 = cmp.eq(r0,r1)}
-// {r3 = mux(p0,#1,#3)}
-//
-// This requires two packets. If we use .new predicated immediate transfers,
-// then we can do this in a single packet, e.g.:
-//
-// {p0 = cmp.eq(r0,r1)
-// if (p0.new) r3 = #1
-// if (!p0.new) r3 = #3}
-//
-// Note that the conditional assignments are not generated in .new form here.
-// We assume opptimisically that they will be formed later.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "xfer"
-
-namespace llvm {
- void initializeHexagonSplitTFRCondSetsPass(PassRegistry&);
-}
-
-
-namespace {
-
-class HexagonSplitTFRCondSets : public MachineFunctionPass {
- public:
- static char ID;
- HexagonSplitTFRCondSets() : MachineFunctionPass(ID) {
- initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry());
- }
-
- const char *getPassName() const override {
- return "Hexagon Split TFRCondSets";
- }
- bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-
-
-char HexagonSplitTFRCondSets::ID = 0;
-
-
-bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
-
- const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
-
- // Loop over all of the basic blocks.
- for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
- MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
- // Traverse the basic block.
- for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
- ++MII) {
- MachineInstr *MI = MII;
- switch(MI->getOpcode()) {
- case Hexagon::TFR_condset_ri: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(2).getReg();
-
- // Do not emit the predicated copy if the source and the destination
- // is the same register.
- if (DestReg != SrcReg1) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::A2_tfrt), DestReg).
- addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
- }
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveif), DestReg).
- addReg(MI->getOperand(1).getReg()).
- addImm(MI->getOperand(3).getImm());
-
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- case Hexagon::TFR_condset_ir: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg2 = MI->getOperand(3).getReg();
-
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveit), DestReg).
- addReg(MI->getOperand(1).getReg()).
- addImm(MI->getOperand(2).getImm());
-
- // Do not emit the predicated copy if the source and
- // the destination is the same register.
- if (DestReg != SrcReg2) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::A2_tfrf), DestReg).
- addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
- }
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- case Hexagon::TFR_condset_ii: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(1).getReg();
-
- int Immed1 = MI->getOperand(2).getImm();
- int Immed2 = MI->getOperand(3).getImm();
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveit),
- DestReg).addReg(SrcReg1).addImm(Immed1);
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveif),
- DestReg).addReg(SrcReg1).addImm(Immed2);
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- }
- }
- }
- return true;
-}
-
-}
-
-//===----------------------------------------------------------------------===//
-// Public Constructor Functions
-//===----------------------------------------------------------------------===//
-
-static void initializePassOnce(PassRegistry &Registry) {
- const char *Name = "Hexagon Split TFRCondSets";
- PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr",
- &HexagonSplitTFRCondSets::ID, nullptr, false,
- false);
- Registry.registerPass(*PI, true);
-}
-
-void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) {
- CALL_ONCE_INITIALIZATION(initializePassOnce)
-}
-
-FunctionPass *llvm::createHexagonSplitTFRCondSets() {
- return new HexagonSplitTFRCondSets();
-}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 380f023..1717ae3 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -48,6 +48,10 @@ EnableIEEERndNear(
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Generate non-chopped conversion from fp to int."));
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
+
HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
// If the programmer has not specified a Hexagon version, default to -mv4.
@@ -91,3 +95,9 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
// Pin the vtable to this file.
void HexagonSubtarget::anchor() {}
+
+bool HexagonSubtarget::enableMachineScheduler() const {
+ if (DisableHexagonMISched.getNumOccurrences())
+ return !DisableHexagonMISched;
+ return true;
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 57de546..780567b 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -85,6 +85,11 @@ public:
bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
bool modeIEEERndNear() const { return ModeIEEERndNear; }
+ bool enableMachineScheduler() const override;
+ // Always use the TargetLowering default scheduler.
+ // FIXME: This will use the vliw scheduler which is probably just hurting
+ // compiler time and will be removed eventually anyway.
+ bool enableMachineSchedDefaultSched() const override { return false; }
const std::string &getCPUString () const { return CPUString; }
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 64f75a3..48b0bc8 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -29,10 +29,6 @@ using namespace llvm;
static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
-static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon MI Scheduling"));
-
static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Hexagon CFG Optimization"));
@@ -69,9 +65,10 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(make_unique<HexagonTargetObjectFile>()),
- DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"), Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
@@ -82,16 +79,7 @@ namespace {
class HexagonPassConfig : public TargetPassConfig {
public:
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {
- // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define
- // HexagonSubtarget::enableMachineScheduler() { return true; }.
- // That will bypass the SelectionDAG VLIW scheduler, which is probably just
- // hurting compile time and will be removed eventually anyway.
- if (DisableHexagonMISched)
- disablePass(&MachineSchedulerID);
- else
- enablePass(&MachineSchedulerID);
- }
+ : TargetPassConfig(TM, PM) {}
HexagonTargetMachine &getHexagonTargetMachine() const {
return getTM<HexagonTargetMachine>();
@@ -159,9 +147,6 @@ void HexagonPassConfig::addPreEmitPass() {
// Expand Spill code for predicate registers.
addPass(createHexagonExpandPredSpillCode(), false);
- // Split up TFRcondsets into conditional transfers.
- addPass(createHexagonSplitTFRCondSets(), false);
-
// Create Packets.
if (!NoOpt) {
if (!DisableHardwareLoops)
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index e0b3a9b..5774f7e 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -24,7 +24,6 @@ class Module;
class HexagonTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment.
HexagonSubtarget Subtarget;
public:
@@ -33,8 +32,7 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~HexagonTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const HexagonSubtarget *getSubtargetImpl() const override {
+ const HexagonSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
static unsigned getModuleMatchQuality(const Module &M);
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c123640..4ca628e 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -389,7 +389,9 @@ static bool IsLoopN(MachineInstr *MI) {
/// callee-saved register.
static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
- for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+ for (const MCPhysReg *CSR =
+ TRI->getCalleeSavedRegs(MI->getParent()->getParent());
+ *CSR; ++CSR) {
unsigned CalleeSavedReg = *CSR;
if (MI->modifiesRegister(CalleeSavedReg, TRI))
return true;
@@ -401,10 +403,7 @@ static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
// or new-value store.
bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- if ( isCondInst(MI) || QII->mayBeNewStore(MI))
- return true;
- else
- return false;
+ return isCondInst(MI) || QII->mayBeNewStore(MI);
}
bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 56c9dc7..4a3ac8c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "hexagon-elf-writer"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index a5a09ba..eac7d6d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -49,9 +49,8 @@ void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
}
HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
- MCSubtargetInfo const &aMST,
MCContext &aMCT)
- : MST(aMST), MCT(aMCT), MCII (aMII) {}
+ : MCT(aMCT), MCII(aMII) {}
void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -75,15 +74,10 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
llvm_unreachable("Only Immediates and Registers implemented right now");
}
-MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const {
- return MST;
-}
-
MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
MCRegisterInfo const &MRI,
- MCSubtargetInfo const &MST,
MCContext &MCT) {
- return new HexagonMCCodeEmitter(MII, MST, MCT);
+ return new HexagonMCCodeEmitter(MII, MCT);
}
#include "HexagonGenMCCodeEmitter.inc"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index db1d707..768c10e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -26,13 +26,11 @@
namespace llvm {
class HexagonMCCodeEmitter : public MCCodeEmitter {
- MCSubtargetInfo const &MST;
MCContext &MCT;
MCInstrInfo const &MCII;
public:
- HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST,
- MCContext &aMCT);
+ HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
MCSubtargetInfo const &getSubtargetInfo() const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 09a305b..c63bf32 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -47,15 +47,6 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
return X;
}
-static MCStreamer *
-createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *CE,
- bool RelaxAll) {
- MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE);
- return ES;
-}
-
-
static MCSubtargetInfo *
createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
@@ -75,16 +66,6 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCStreamer *createMCStreamer(Target const &T, StringRef TT,
- MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- MCSubtargetInfo const &STI, bool RelaxAll) {
- MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
- new MCTargetStreamer(*ES);
- return ES;
-}
-
-
static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
@@ -135,7 +116,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the asm backend
TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
createHexagonAsmBackend);
-
- // Register the obj streamer
- TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index f074b65..17072d9 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -34,7 +34,6 @@ MCInstrInfo *createHexagonMCInstrInfo();
MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
MCRegisterInfo const &MRI,
- MCSubtargetInfo const &MST,
MCContext &MCT);
MCAsmBackend *createHexagonAsmBackend(Target const &T,
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 586f5d9..241f1d6 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
+
namespace llvm {
class Target;
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 2f70cde..591ceb5 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -104,7 +104,7 @@ namespace {
bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -280,12 +280,12 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
}
bool MSP430DAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default: return true;
- case 'm': // memory
+ case InlineAsm::Constraint_m: // memory
if (!SelectAddr(Op, Op0, Op1))
return true;
break;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 9266c3b..68868b6 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -102,6 +102,12 @@ namespace llvm {
const std::string &Constraint,
MVT VT) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
/// isTruncateFree - Return true if it's free to truncate a value of type
/// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
/// register R15W to i8 by referencing its sub-register R15B.
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 3f88a69..0cfa4a4 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -26,8 +26,7 @@ public:
MSP430RegisterInfo();
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass*
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 7468519..3dda3bf 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -25,7 +25,8 @@ using namespace llvm;
void MSP430Subtarget::anchor() { }
-MSP430Subtarget &MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
+MSP430Subtarget &
+MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
ParseSubtargetFeatures("generic", FS);
return *this;
}
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 348e672..d6cc4ae 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -30,10 +30,11 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, "e-m:e-p:16:16-i32:16:32-a:16-n8:16", TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
// FIXME: Check DataLayout string.
- DL("e-m:e-p:16:16-i32:16:32-a:16-n8:16"), Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index c6a6a70..6ccd30d 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -25,7 +25,6 @@ namespace llvm {
///
class MSP430TargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment
MSP430Subtarget Subtarget;
public:
@@ -35,8 +34,7 @@ public:
CodeGenOpt::Level OL);
~MSP430TargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const MSP430Subtarget *getSubtargetImpl() const override {
+ const MSP430Subtarget *getSubtargetImpl(const Function &F) const override {
return &Subtarget;
}
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 1040bf7..6401bc1 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <memory>
using namespace llvm;
@@ -53,7 +54,13 @@ public:
}
unsigned getATRegNum() const { return ATReg; }
- bool setATReg(unsigned Reg);
+ bool setATReg(unsigned Reg) {
+ if (Reg > 31)
+ return false;
+
+ ATReg = Reg;
+ return true;
+ }
bool isReorder() const { return Reorder; }
void setReorder() { Reorder = true; }
@@ -193,6 +200,9 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
bool reportParseError(Twine ErrorMsg);
bool reportParseError(SMLoc Loc, Twine ErrorMsg);
@@ -236,6 +246,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
StringRef Directive);
+ bool parseInternalDirectiveReallowModule();
+
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
bool eatComma(StringRef ErrorStr);
@@ -1365,22 +1377,11 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
+ // If this instruction has a delay slot and .set reorder is active,
+ // emit a NOP after it.
if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) {
- // If this instruction has a delay slot and .set reorder is active,
- // emit a NOP after it.
Instructions.push_back(Inst);
- MCInst NopInst;
- if (hasShortDelaySlot(Inst.getOpcode())) {
- NopInst.setOpcode(Mips::MOVE16_MM);
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- } else {
- NopInst.setOpcode(Mips::SLL);
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateImm(0));
- }
- Instructions.push_back(NopInst);
+ createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
return false;
}
@@ -1584,10 +1585,10 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
bool MipsAsmParser::needsExpansion(MCInst &Inst) {
switch (Inst.getOpcode()) {
- case Mips::LoadImm32Reg:
- case Mips::LoadAddr32Imm:
- case Mips::LoadAddr32Reg:
- case Mips::LoadImm64Reg:
+ case Mips::LoadImm32:
+ case Mips::LoadImm64:
+ case Mips::LoadAddrImm32:
+ case Mips::LoadAddrReg32:
case Mips::B_MM_Pseudo:
case Mips::LWM_MM:
case Mips::SWM_MM:
@@ -1603,17 +1604,17 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
default: llvm_unreachable("unimplemented expansion");
- case Mips::LoadImm32Reg:
+ case Mips::LoadImm32:
return expandLoadImm(Inst, IDLoc, Instructions);
- case Mips::LoadImm64Reg:
+ case Mips::LoadImm64:
if (!isGP64bit()) {
Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
}
return expandLoadImm(Inst, IDLoc, Instructions);
- case Mips::LoadAddr32Imm:
+ case Mips::LoadAddrImm32:
return expandLoadAddressImm(Inst, IDLoc, Instructions);
- case Mips::LoadAddr32Reg:
+ case Mips::LoadAddrReg32:
return expandLoadAddressReg(Inst, IDLoc, Instructions);
case Mips::B_MM_Pseudo:
return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions);
@@ -1982,14 +1983,10 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
}
Instructions.push_back(Inst);
- if (AssemblerOptions.back()->isReorder()) {
- // If .set reorder is active, emit a NOP after the branch instruction.
- MCInst NopInst;
- NopInst.setOpcode(Mips::MOVE16_MM);
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- Instructions.push_back(NopInst);
- }
+ // If .set reorder is active, emit a NOP after the branch instruction.
+ if (AssemblerOptions.back()->isReorder())
+ createNop(true, IDLoc, Instructions);
+
return false;
}
@@ -2132,6 +2129,22 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
return false;
}
+void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst NopInst;
+ if (hasShortDelaySlot) {
+ NopInst.setOpcode(Mips::MOVE16_MM);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ } else {
+ NopInst.setOpcode(Mips::SLL);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateImm(0));
+ }
+ Instructions.push_back(NopInst);
+}
+
unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// As described by the Mips32r2 spec, the registers Rd and Rs for
// jalr.hb must be different.
@@ -2370,14 +2383,6 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
return CC;
}
-bool MipsAssemblerOptions::setATReg(unsigned Reg) {
- if (Reg > 31)
- return false;
-
- ATReg = Reg;
- return true;
-}
-
int MipsAsmParser::getATReg(SMLoc Loc) {
int AT = AssemblerOptions.back()->getATRegNum();
if (AT == 0)
@@ -4429,9 +4434,25 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".module")
return parseDirectiveModule();
+ if (IDVal == ".llvm_internal_mips_reallow_module_directive")
+ return parseInternalDirectiveReallowModule();
+
return true;
}
+bool MipsAsmParser::parseInternalDirectiveReallowModule() {
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ getTargetStreamer().reallowModuleDirective();
+
+ getParser().Lex(); // Eat EndOfStatement token.
+ return false;
+}
+
extern "C" void LLVMInitializeMipsAsmParser() {
RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index dd0e54c..243b73d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -32,10 +32,9 @@ class MipsAsmBackend : public MCAsmBackend {
bool Is64Bit; // 32 or 64 bit words
public:
- MipsAsmBackend(const Target &T, Triple::OSType _OSType, bool _isLittle,
- bool _is64Bit)
- : MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle),
- Is64Bit(_is64Bit) {}
+ MipsAsmBackend(const Target &T, Triple::OSType OSType, bool IsLittle,
+ bool Is64Bit)
+ : MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index e14dc8d..a68bf16 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -38,9 +38,9 @@ namespace {
MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
bool _isN64, bool IsLittleEndian)
- : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
- /*HasRelocationAddend*/ (_isN64) ? true : false,
- /*IsN64*/ _isN64) {}
+ : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
+ /*HasRelocationAddend*/ _isN64,
+ /*IsN64*/ _isN64) {}
MipsELFObjectWriter::~MipsELFObjectWriter() {}
@@ -54,9 +54,11 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
switch (Kind) {
default:
llvm_unreachable("invalid fixup kind!");
+ case Mips::fixup_Mips_32:
case FK_Data_4:
Type = ELF::R_MIPS_32;
break;
+ case Mips::fixup_Mips_64:
case FK_Data_8:
Type = ELF::R_MIPS_64;
break;
@@ -262,12 +264,10 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
}
}
-MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
- uint8_t OSABI,
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
bool IsLittleEndian,
bool Is64Bit) {
- MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
- (Is64Bit) ? true : false,
- IsLittleEndian);
+ MCELFObjectTargetWriter *MOTW =
+ new MipsELFObjectWriter(Is64Bit, OSABI, Is64Bit, IsLittleEndian);
return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 18c4a20..93f60df 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -69,11 +69,9 @@ void MipsELFStreamer::EmitMipsOptionRecords() {
I->EmitMipsOptionRecord();
}
-namespace llvm {
-MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll) {
- return new MipsELFStreamer(Context, MAB, OS, Emitter, STI);
-}
+MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return new MipsELFStreamer(Context, MAB, OS, Emitter);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index bc76d8a..6b834c6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -34,7 +34,7 @@ class MipsELFStreamer : public MCELFStreamer {
public:
MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
- MCCodeEmitter *Emitter, const MCSubtargetInfo &STI)
+ MCCodeEmitter *Emitter)
: MCELFStreamer(Context, MAB, OS, Emitter) {
RegInfoRecord = new MipsRegInfoRecord(this, Context);
@@ -69,6 +69,6 @@ public:
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll);
+ bool RelaxAll);
} // namespace llvm.
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index fa8d6a6..e601963 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -18,7 +18,7 @@ namespace Mips {
// one can have multiple fixup types for a given relocation and thus need
// to be uniquely named.
//
- // This table *must* be in the save order of
+ // This table *must* be in the same order of
// MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
// in MipsAsmBackend.cpp.
//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 8208725..1c2f2da 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -35,14 +35,12 @@
namespace llvm {
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new MipsMCCodeEmitter(MCII, Ctx, false);
}
MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new MipsMCCodeEmitter(MCII, Ctx, true);
}
@@ -451,7 +449,7 @@ getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
-getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
+getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
int64_t Res;
@@ -500,6 +498,9 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
default: llvm_unreachable("Unknown fixup kind!");
break;
+ case MCSymbolRefExpr::VK_None:
+ FixupKind = Mips::fixup_Mips_32; // FIXME: This is ok for O32/N32 but not N64.
+ break;
case MCSymbolRefExpr::VK_Mips_GPOFF_HI :
FixupKind = Mips::fixup_Mips_GPOFF_HI;
break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index e756b47..e6b5be7 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -25,7 +25,6 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
bool RelaxAll);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 9b56067..6f3f37b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -106,96 +106,73 @@ static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
return new MipsInstPrinter(MAI, MII, MRI);
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
+static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
MCStreamer *S;
- if (!Triple(TT).isOSNaCl())
- S = createMipsELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
+ if (!T.isOSNaCl())
+ S = createMipsELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
else
- S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
- new MipsTargetELFStreamer(*S, STI);
+ S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
return S;
}
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new MipsTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *createMipsAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new MipsTargetAsmStreamer(S, OS);
}
static MCTargetStreamer *createMipsNullTargetStreamer(MCStreamer &S) {
return new MipsTargetStreamer(S);
}
+static MCTargetStreamer *
+createMipsObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ return new MipsTargetELFStreamer(S, STI);
+}
+
extern "C" void LLVMInitializeMipsTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
- RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo);
- RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo);
- RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget,
- createMipsMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget,
- createMipsMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target,
- createMipsMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget,
- createMipsMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget,
- createMipsMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheMips64elTarget,
- createMipsMCRegisterInfo);
+ for (Target *T : {&TheMipsTarget, &TheMipselTarget, &TheMips64Target,
+ &TheMips64elTarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createMipsMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createMipsMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createMipsMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createMipsMCRegisterInfo);
+
+ // Register the elf streamer.
+ TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
+
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createMipsAsmTargetStreamer);
+
+ TargetRegistry::RegisterNullTargetStreamer(*T,
+ createMipsNullTargetStreamer);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createMipsMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createMipsMCInstPrinter);
+
+ TargetRegistry::RegisterObjectTargetStreamer(
+ *T, createMipsObjectTargetStreamer);
+ }
// Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget,
- createMipsMCCodeEmitterEB);
- TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
- createMipsMCCodeEmitterEL);
- TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
- createMipsMCCodeEmitterEB);
- TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
- createMipsMCCodeEmitterEL);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget,
- createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheMipsTarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheMipselTarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer);
-
- TargetRegistry::RegisterNullTargetStreamer(TheMipsTarget,
- createMipsNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheMipselTarget,
- createMipsNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheMips64Target,
- createMipsNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheMips64elTarget,
- createMipsNullTargetStreamer);
+ for (Target *T : {&TheMipsTarget, &TheMips64Target})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEB);
+
+ for (Target *T : {&TheMipselTarget, &TheMips64elTarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEL);
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
@@ -207,23 +184,4 @@ extern "C" void LLVMInitializeMipsTargetMC() {
TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget,
createMipsAsmBackendEL64);
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
- createMipsMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget,
- createMipsMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target,
- createMipsMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget,
- createMipsMCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheMipsTarget,
- createMipsMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
- createMipsMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheMips64Target,
- createMipsMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget,
- createMipsMCInstPrinter);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 9528b4e..92f394a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -35,11 +35,9 @@ extern Target TheMips64elTarget;
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createMipsAsmBackendEB32(const Target &T,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 92b8455..1adfdf9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -37,8 +37,8 @@ const unsigned LoadStoreStackMaskReg = Mips::T7;
class MipsNaClELFStreamer : public MipsELFStreamer {
public:
MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter, const MCSubtargetInfo &STI)
- : MipsELFStreamer(Context, TAB, OS, Emitter, STI), PendingCall(false) {}
+ MCCodeEmitter *Emitter)
+ : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {}
~MipsNaClELFStreamer() {}
@@ -254,10 +254,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) {
MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
bool RelaxAll) {
- MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter,
- STI);
+ MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 64d7cab..5790a5c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -62,7 +62,7 @@ void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) {
void MipsTargetStreamer::emitDirectiveSetArch(StringRef Arch) {
forbidModuleDirective();
}
-void MipsTargetStreamer::emitDirectiveSetMips0() {}
+void MipsTargetStreamer::emitDirectiveSetMips0() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips1() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips2() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips3() { forbidModuleDirective(); }
@@ -78,8 +78,8 @@ void MipsTargetStreamer::emitDirectiveSetMips64R2() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips64R3() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips64R5() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); }
-void MipsTargetStreamer::emitDirectiveSetPop() {}
-void MipsTargetStreamer::emitDirectiveSetPush() {}
+void MipsTargetStreamer::emitDirectiveSetPop() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetPush() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
@@ -91,6 +91,10 @@ void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
if (!Enabled && !IsO32ABI)
report_fatal_error("+nooddspreg is only valid for O32");
}
+void MipsTargetStreamer::emitDirectiveSetFp(
+ MipsABIFlagsSection::FpABIKind Value) {
+ forbidModuleDirective();
+}
MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
@@ -198,7 +202,10 @@ void MipsTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) {
MipsTargetStreamer::emitDirectiveSetArch(Arch);
}
-void MipsTargetAsmStreamer::emitDirectiveSetMips0() { OS << "\t.set\tmips0\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetMips0() {
+ OS << "\t.set\tmips0\n";
+ MipsTargetStreamer::emitDirectiveSetMips0();
+}
void MipsTargetAsmStreamer::emitDirectiveSetMips1() {
OS << "\t.set\tmips1\n";
@@ -285,9 +292,15 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() {
MipsTargetStreamer::emitDirectiveSetNoDsp();
}
-void MipsTargetAsmStreamer::emitDirectiveSetPop() { OS << "\t.set\tpop\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetPop() {
+ OS << "\t.set\tpop\n";
+ MipsTargetStreamer::emitDirectiveSetPop();
+}
-void MipsTargetAsmStreamer::emitDirectiveSetPush() { OS << "\t.set\tpush\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetPush() {
+ OS << "\t.set\tpush\n";
+ MipsTargetStreamer::emitDirectiveSetPush();
+}
// Print a 32 bit hex number with all numbers.
static void printHex32(unsigned Value, raw_ostream &OS) {
@@ -346,15 +359,13 @@ void MipsTargetAsmStreamer::emitDirectiveModuleFP(
void MipsTargetAsmStreamer::emitDirectiveSetFp(
MipsABIFlagsSection::FpABIKind Value) {
+ MipsTargetStreamer::emitDirectiveSetFp(Value);
+
StringRef ModuleValue;
OS << "\t.set\tfp=";
OS << ABIFlagsSection.getFpABIString(Value) << "\n";
}
-void MipsTargetAsmStreamer::emitMipsAbiFlags() {
- // No action required for text output.
-}
-
void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
bool IsO32ABI) {
MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
@@ -367,10 +378,7 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
: MipsTargetStreamer(S), MicroMipsEnabled(false), STI(STI) {
MCAssembler &MCA = getStreamer().getAssembler();
- Triple T(STI.getTargetTriple());
- Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_)
- ? true
- : false;
+ Pic = MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_;
uint64_t Features = STI.getFeatureBits();
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index e20df2f..2aab739 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -642,8 +642,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
LW_FM_MM<0xc>;
/// Arithmetic Instructions (3-Operand, R-Type)
- def ADDu_MM : MMRel, ArithLogicR<"addu", GPR32Opnd>, ADD_FM_MM<0, 0x150>;
- def SUBu_MM : MMRel, ArithLogicR<"subu", GPR32Opnd>, ADD_FM_MM<0, 0x1d0>;
+ def ADDu_MM : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
+ ADD_FM_MM<0, 0x150>;
+ def SUBu_MM : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
+ ADD_FM_MM<0, 0x1d0>;
def MUL_MM : MMRel, ArithLogicR<"mul", GPR32Opnd>, ADD_FM_MM<0, 0x210>;
def ADD_MM : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM_MM<0, 0x110>;
def SUB_MM : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM_MM<0, 0x190>;
@@ -883,6 +885,8 @@ def : MipsPat<(i32 immSExt16:$imm),
(ADDiu_MM ZERO, immSExt16:$imm)>;
def : MipsPat<(i32 immZExt16:$imm),
(ORi_MM ZERO, immZExt16:$imm)>;
+def : MipsPat<(not GPR32:$in),
+ (NOR_MM GPR32Opnd:$in, ZERO)>;
def : MipsPat<(add GPRMM16:$src, immSExtAddiur2:$imm),
(ADDIUR2_MM GPRMM16:$src, immSExtAddiur2:$imm)>;
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index cb09c1a..671d7a8 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -20,8 +20,13 @@
namespace llvm {
class MipsTargetMachine;
+ class ModulePass;
class FunctionPass;
+ ModulePass *createMipsOs16Pass(MipsTargetMachine &TM);
+ ModulePass *createMips16HardFloatPass(MipsTargetMachine &TM);
+
+ FunctionPass *createMipsModuleISelDagPass(MipsTargetMachine &TM);
FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 01c548e..ca24741 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -58,22 +58,22 @@ def MipsInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true",
- "Disable SVR4-style position-independent code.">;
+ "Disable SVR4-style position-independent code">;
def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
- "General Purpose Registers are 64-bit wide.">;
+ "General Purpose Registers are 64-bit wide">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
- "Support 64-bit FP registers.">;
+ "Support 64-bit FP registers">;
def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
- "Support for FPXX.">;
+ "Support for FPXX">;
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
- "IEEE 754-2008 NaN encoding.">;
+ "IEEE 754-2008 NaN encoding">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
"Disable odd numbered single-precision "
"registers">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
- "true", "Enable vector FPU instructions.">;
+ "true", "Enable vector FPU instructions">;
def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
"Mips I ISA Support [highly experimental]">;
def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 32dc90a..893fc7c 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Mips16HardFloat.h"
+#include "MipsTargetMachine.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
@@ -19,38 +19,51 @@
#include <algorithm>
#include <string>
-#define DEBUG_TYPE "mips16-hard-float"
+using namespace llvm;
-static void inlineAsmOut
- (LLVMContext &C, StringRef AsmString, BasicBlock *BB ) {
- std::vector<llvm::Type *> AsmArgTypes;
- std::vector<llvm::Value*> AsmArgs;
- llvm::FunctionType *AsmFTy =
- llvm::FunctionType::get(Type::getVoidTy(C),
- AsmArgTypes, false);
- llvm::InlineAsm *IA =
- llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
- /* IsAlignStack */ false,
- llvm::InlineAsm::AD_ATT);
- CallInst::Create(IA, AsmArgs, "", BB);
-}
+#define DEBUG_TYPE "mips16-hard-float"
namespace {
+ class Mips16HardFloat : public ModulePass {
+ public:
+ static char ID;
-class InlineAsmHelper {
- LLVMContext &C;
- BasicBlock *BB;
-public:
- InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
- C(C_), BB(BB_) {
- }
+ Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
- void Out(StringRef AsmString) {
- inlineAsmOut(C, AsmString, BB);
- }
+ const char *getPassName() const override {
+ return "MIPS16 Hard Float Pass";
+ }
-};
+ bool runOnModule(Module &M) override;
+
+ protected:
+ const MipsTargetMachine &TM;
+ };
+
+ class InlineAsmHelper {
+ LLVMContext &C;
+ BasicBlock *BB;
+ public:
+ InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
+ C(C_), BB(BB_) {
+ }
+
+ void Out(StringRef AsmString) {
+ std::vector<llvm::Type *> AsmArgTypes;
+ std::vector<llvm::Value*> AsmArgs;
+
+ llvm::FunctionType *AsmFTy = llvm::FunctionType::get(Type::getVoidTy(C),
+ AsmArgTypes, false);
+ llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
+ /* IsAlignStack */ false,
+ llvm::InlineAsm::AD_ATT);
+ CallInst::Create(IA, AsmArgs, "", BB);
+ }
+ };
+
+ char Mips16HardFloat::ID = 0;
}
+
//
// Return types that matter for hard float are:
// float, double, complex float, and complex double
@@ -154,11 +167,11 @@ static bool needsFPStubFromParams(Function &F) {
if (F.arg_size() >=1) {
Type *ArgType = F.getFunctionType()->getParamType(0);
switch (ArgType->getTypeID()) {
- case Type::FloatTyID:
- case Type::DoubleTyID:
- return true;
- default:
- break;
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return true;
+ default:
+ break;
}
}
return false;
@@ -182,10 +195,8 @@ static bool needsFPHelperFromSig(Function &F) {
// We swap between FP and Integer registers to allow Mips16 and Mips32 to
// interoperate
//
-
-static void swapFPIntParams
- (FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
- bool LE, bool ToFP) {
+static void swapFPIntParams(FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
+ bool LE, bool ToFP) {
//LLVMContext &Context = M->getContext();
std::string MI = ToFP? "mtc1 ": "mfc1 ";
switch (PV) {
@@ -242,6 +253,7 @@ static void swapFPIntParams
return;
}
}
+
//
// Make sure that we know we already need a stub for this function.
// Having called needsFPHelperFromSig
@@ -297,8 +309,8 @@ static void assureFPCallStub(Function &F, Module *M,
break;
case CFRet:
if (LE) {
- IAH.Out("mfc1 $$2,$$f0");
- IAH.Out("mfc1 $$3,$$f2");
+ IAH.Out("mfc1 $$2,$$f0");
+ IAH.Out("mfc1 $$3,$$f2");
} else {
IAH.Out("mfc1 $$3,$$f0");
IAH.Out("mfc1 $$3,$$f2");
@@ -331,28 +343,27 @@ static void assureFPCallStub(Function &F, Module *M,
//
// Functions that are llvm intrinsics and don't need helpers.
//
-static const char *IntrinsicInline[] =
- {"fabs",
- "fabsf",
- "llvm.ceil.f32", "llvm.ceil.f64",
- "llvm.copysign.f32", "llvm.copysign.f64",
- "llvm.cos.f32", "llvm.cos.f64",
- "llvm.exp.f32", "llvm.exp.f64",
- "llvm.exp2.f32", "llvm.exp2.f64",
- "llvm.fabs.f32", "llvm.fabs.f64",
- "llvm.floor.f32", "llvm.floor.f64",
- "llvm.fma.f32", "llvm.fma.f64",
- "llvm.log.f32", "llvm.log.f64",
- "llvm.log10.f32", "llvm.log10.f64",
- "llvm.nearbyint.f32", "llvm.nearbyint.f64",
- "llvm.pow.f32", "llvm.pow.f64",
- "llvm.powi.f32", "llvm.powi.f64",
- "llvm.rint.f32", "llvm.rint.f64",
- "llvm.round.f32", "llvm.round.f64",
- "llvm.sin.f32", "llvm.sin.f64",
- "llvm.sqrt.f32", "llvm.sqrt.f64",
- "llvm.trunc.f32", "llvm.trunc.f64",
- };
+static const char *IntrinsicInline[] = {
+ "fabs", "fabsf",
+ "llvm.ceil.f32", "llvm.ceil.f64",
+ "llvm.copysign.f32", "llvm.copysign.f64",
+ "llvm.cos.f32", "llvm.cos.f64",
+ "llvm.exp.f32", "llvm.exp.f64",
+ "llvm.exp2.f32", "llvm.exp2.f64",
+ "llvm.fabs.f32", "llvm.fabs.f64",
+ "llvm.floor.f32", "llvm.floor.f64",
+ "llvm.fma.f32", "llvm.fma.f64",
+ "llvm.log.f32", "llvm.log.f64",
+ "llvm.log10.f32", "llvm.log10.f64",
+ "llvm.nearbyint.f32", "llvm.nearbyint.f64",
+ "llvm.pow.f32", "llvm.pow.f64",
+ "llvm.powi.f32", "llvm.powi.f64",
+ "llvm.rint.f32", "llvm.rint.f64",
+ "llvm.round.f32", "llvm.round.f64",
+ "llvm.sin.f32", "llvm.sin.f64",
+ "llvm.sqrt.f32", "llvm.sqrt.f64",
+ "llvm.trunc.f32", "llvm.trunc.f64",
+};
static bool isIntrinsicInline(Function *F) {
return std::binary_search(std::begin(IntrinsicInline),
@@ -384,9 +395,10 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
Type *T = RVal->getType();
FPReturnVariant RV = whichFPReturnVariant(T);
if (RV == NoFPRet) continue;
- static const char* Helper[NoFPRet] =
- {"__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
- "__mips16_ret_dc"};
+ static const char* Helper[NoFPRet] = {
+ "__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
+ "__mips16_ret_dc"
+ };
const char *Name = Helper[RV];
AttributeSet A;
Value *Params[] = {RVal};
@@ -406,33 +418,33 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr));
CallInst::Create(F, Params, "", &Inst );
} else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- const Value* V = CI->getCalledValue();
- const Type* T = nullptr;
- if (V) T = V->getType();
- const PointerType *PFT=nullptr;
- if (T) PFT = dyn_cast<PointerType>(T);
- const FunctionType *FT=nullptr;
- if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
- Function *F_ = CI->getCalledFunction();
- if (FT && needsFPReturnHelper(*FT) &&
- !(F_ && isIntrinsicInline(F_))) {
+ const Value* V = CI->getCalledValue();
+ const Type* T = nullptr;
+ if (V) T = V->getType();
+ const PointerType *PFT=nullptr;
+ if (T) PFT = dyn_cast<PointerType>(T);
+ const FunctionType *FT=nullptr;
+ if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
+ Function *F_ = CI->getCalledFunction();
+ if (FT && needsFPReturnHelper(*FT) &&
+ !(F_ && isIntrinsicInline(F_))) {
+ Modified=true;
+ F.addFnAttr("saveS2");
+ }
+ if (F_ && !isIntrinsicInline(F_)) {
+ // pic mode calls are handled by already defined
+ // helper functions
+ if (needsFPReturnHelper(*F_)) {
Modified=true;
F.addFnAttr("saveS2");
}
- if (F_ && !isIntrinsicInline(F_)) {
- // pic mode calls are handled by already defined
- // helper functions
- if (needsFPReturnHelper(*F_)) {
+ if (TM.getRelocationModel() != Reloc::PIC_ ) {
+ if (needsFPHelperFromSig(*F_)) {
+ assureFPCallStub(*F_, M, TM);
Modified=true;
- F.addFnAttr("saveS2");
- }
- if (TM.getRelocationModel() != Reloc::PIC_ ) {
- if (needsFPHelperFromSig(*F_)) {
- assureFPCallStub(*F_, M, TM);
- Modified=true;
- }
}
}
+ }
}
}
return Modified;
@@ -489,7 +501,6 @@ static void removeUseSoftFloat(Function &F) {
F.addAttributes(AttributeSet::FunctionIndex, A);
}
-namespace llvm {
//
// This pass only makes sense when the underlying chip has floating point but
@@ -530,11 +541,7 @@ bool Mips16HardFloat::runOnModule(Module &M) {
return Modified;
}
-char Mips16HardFloat::ID = 0;
-
-}
-ModulePass *llvm::createMips16HardFloat(MipsTargetMachine &TM) {
+ModulePass *llvm::createMips16HardFloatPass(MipsTargetMachine &TM) {
return new Mips16HardFloat(TM);
}
-
diff --git a/lib/Target/Mips/Mips16HardFloat.h b/lib/Target/Mips/Mips16HardFloat.h
deleted file mode 100644
index 586cc25..0000000
--- a/lib/Target/Mips/Mips16HardFloat.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===---- Mips16HardFloat.h for Mips16 Hard Float --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a phase which implements part of the floating point
-// interoperability between Mips16 and Mips32 code.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
-#define LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
-
-#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsTargetMachine.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-class Mips16HardFloat : public ModulePass {
-public:
- static char ID;
-
- Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
-
- const char *getPassName() const override { return "MIPS16 Hard Float Pass"; }
- bool runOnModule(Module &M) override;
-
-protected:
- const MipsTargetMachine &TM;
-};
-
-ModulePass *createMips16HardFloat(MipsTargetMachine &TM);
-
-}
-#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 976becc..00d4495 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -1,4 +1,3 @@
-
//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===//
//
// The LLVM Compiler Infrastructure
@@ -25,6 +24,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <cctype>
using namespace llvm;
@@ -32,7 +32,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips16-instrinfo"
Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI)
- : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {}
+ : MipsInstrInfo(STI, Mips::Bimm16), RI() {}
const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
return RI;
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index e7d0c07..f9b7387 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -18,7 +18,7 @@
#include "MipsInstrInfo.h"
namespace llvm {
-
+class MipsSubtarget;
class Mips16InstrInfo : public MipsInstrInfo {
const Mips16RegisterInfo RI;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index c45acc4..ebd51d7 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -41,8 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips16-registerinfo"
-Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST)
- : MipsRegisterInfo(ST) {}
+Mips16RegisterInfo::Mips16RegisterInfo() : MipsRegisterInfo() {}
bool Mips16RegisterInfo::requiresRegisterScavenging
(const MachineFunction &MF) const {
@@ -65,7 +64,7 @@ bool Mips16RegisterInfo::saveScavengerRegister
const TargetRegisterClass *RC,
unsigned Reg) const {
DebugLoc DL;
- const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true);
return true;
@@ -106,7 +105,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Mips::SP;
else {
- const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
FrameReg = Mips::S0;
}
@@ -140,7 +139,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
DebugLoc DL = II->getDebugLoc();
unsigned NewImm;
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo *>(Subtarget.getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
Offset = SignExtend64<16>(NewImm);
IsKill = true;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index 3cdf836..d67a79b 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -21,7 +21,7 @@ class Mips16InstrInfo;
class Mips16RegisterInfo : public MipsRegisterInfo {
public:
- Mips16RegisterInfo(const MipsSubtarget &Subtarget);
+ Mips16RegisterInfo();
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 776e473..b1cb7f7 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -604,7 +604,7 @@ def : MipsInstAlias<"syncws", (SYNC 0x5), 0>;
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
-class LoadImm64<string instr_asm, Operand Od, RegisterOperand RO> :
+class LoadImmediate64<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
!strconcat(instr_asm, "\t$rt, $imm64")> ;
-def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>;
+def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index c662e13..1eb3b2c 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -252,6 +252,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
// Set the CPU and FPU Bitmasks
const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
// size of stack area to which FP callee-saved regs are saved.
unsigned CPURegSize = Mips::GPR32RegClass.getSize();
@@ -267,8 +268,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
if (Mips::GPR32RegClass.contains(Reg))
break;
- unsigned RegNum =
- TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
+ unsigned RegNum = TRI->getEncodingValue(Reg);
if (Mips::AFGR64RegClass.contains(Reg)) {
FPUBitmask |= (3 << RegNum);
CSFPRegsSize += AFGR64RegSize;
@@ -283,8 +283,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
// Set CPU Bitmask.
for (; i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- unsigned RegNum =
- TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
+ unsigned RegNum = TRI->getEncodingValue(Reg);
CPUBitmask |= (1 << RegNum);
}
@@ -309,7 +308,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
/// Frame Directive
void MipsAsmPrinter::emitFrameDirective() {
- const TargetRegisterInfo &RI = *TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo &RI = *MF->getSubtarget().getRegisterInfo();
unsigned stackReg = RI.getFrameRegister(*MF);
unsigned returnReg = RI.getRARegister();
@@ -438,7 +437,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
// Print out an operand for an inline asm expression.
bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,const char *ExtraCode,
+ unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
@@ -540,18 +539,24 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNum, unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
- int Offset = 0;
+ assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
+ const MachineOperand &BaseMO = MI->getOperand(OpNum);
+ const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1);
+ assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand.");
+ assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand.");
+ int Offset = OffsetMO.getImm();
+
// Currently we are expecting either no ExtraCode or 'D'
if (ExtraCode) {
if (ExtraCode[0] == 'D')
- Offset = 4;
+ Offset += 4;
else
return true; // Unknown modifier.
+ // FIXME: M = high order bits
+ // FIXME: L = low order bits
}
- const MachineOperand &MO = MI->getOperand(OpNum);
- assert(MO.isReg() && "unexpected inline asm memory operand");
- O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+ O << Offset << "($" << MipsInstPrinter::getRegisterName(BaseMO.getReg()) << ")";
return false;
}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index abee185..dcd88f2 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -123,7 +123,7 @@ def CC_MipsN_SoftFloat : CallingConv<[
]>;
def CC_MipsN : CallingConv<[
- CCIfType<[i8, i16, i32],
+ CCIfType<[i8, i16, i32, i64],
CCIfSubtargetNot<"isLittle()",
CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index ac03c0b..606964d 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -140,7 +140,7 @@ namespace {
/// memory instruction can be moved to a delay slot.
class MemDefsUses : public InspectMemInstr {
public:
- MemDefsUses(const MachineFrameInfo *MFI);
+ MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI);
private:
typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
@@ -158,6 +158,7 @@ namespace {
const MachineFrameInfo *MFI;
SmallPtrSet<ValueType, 4> Uses, Defs;
+ const DataLayout &DL;
/// Flags indicating whether loads or stores with no underlying objects have
/// been seen.
@@ -212,8 +213,8 @@ namespace {
/// moved to the delay slot. Returns true on success.
template<typename IterTy>
bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
- RegDefsUses &RegDU, InspectMemInstr &IM,
- IterTy &Filler, Iter Slot) const;
+ RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot,
+ IterTy &Filler) const;
/// This function searches in the backward direction for an instruction that
/// can be moved to the delay slot. Returns true on success.
@@ -320,7 +321,8 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
CallerSavedRegs.reset(Mips::ZERO);
CallerSavedRegs.reset(Mips::ZERO_64);
- for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R)
+ for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MI.getParent()->getParent());
+ *R; ++R)
for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
CallerSavedRegs.reset(*AI);
@@ -427,9 +429,9 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
return true;
}
-MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
- : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false),
- SeenNoObjStore(false) {}
+MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
+ : InspectMemInstr(false), MFI(MFI_), DL(DL), SeenNoObjLoad(false),
+ SeenNoObjStore(false) {}
bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool HasHazard = false;
@@ -482,7 +484,7 @@ getUnderlyingObjects(const MachineInstr &MI,
const Value *V = (*MI.memoperands_begin())->getValue();
SmallVector<Value *, 4> Objs;
- GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), E = Objs.end();
I != E; ++I) {
@@ -639,8 +641,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
template<typename IterTy>
bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
- RegDefsUses &RegDU, InspectMemInstr& IM,
- IterTy &Filler, Iter Slot) const {
+ RegDefsUses &RegDU, InspectMemInstr& IM, Iter Slot,
+ IterTy &Filler) const {
for (IterTy I = Begin; I != End; ++I) {
// skip debug value
if (I->isDebugValue())
@@ -688,13 +690,13 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
return false;
RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
- MemDefsUses MemDU(MBB.getParent()->getFrameInfo());
+ MemDefsUses MemDU(*TM.getDataLayout(), MBB.getParent()->getFrameInfo());
ReverseIter Filler;
RegDU.init(*Slot);
- if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler,
- Slot))
+ if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Slot,
+ Filler))
return false;
MBB.splice(std::next(Slot), &MBB, std::next(Filler).base());
@@ -714,7 +716,7 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
RegDU.setCallerSaved(*Slot);
- if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Filler, Slot))
+ if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler))
return false;
MBB.splice(std::next(Slot), &MBB, Filler);
@@ -754,11 +756,11 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
IM.reset(new LoadFromStackOrConst());
} else {
const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
- IM.reset(new MemDefsUses(MFI));
+ IM.reset(new MemDefsUses(*TM.getDataLayout(), MFI));
}
- if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler,
- Slot))
+ if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
+ Filler))
return false;
insertDelayFiller(Filler, BrMap);
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 7d69659..7de0081 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -89,6 +89,7 @@ class MipsFastISel final : public FastISel {
private:
// Selection routines.
+ bool selectLogicalOp(const Instruction *I);
bool selectLoad(const Instruction *I);
bool selectStore(const Instruction *I);
bool selectBranch(const Instruction *I);
@@ -102,6 +103,7 @@ private:
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isTypeSupported(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool computeAddress(const Value *Obj, Address &Addr);
bool computeCallAddress(const Value *V, Address &Addr);
@@ -129,6 +131,9 @@ private:
unsigned getRegEnsuringSimpleIntegerWidening(const Value *, bool IsUnsigned);
+ unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
+ const Value *RHS);
+
unsigned materializeFP(const ConstantFP *CFP, MVT VT);
unsigned materializeGV(const GlobalValue *GV, MVT VT);
unsigned materializeInt(const Constant *C, MVT VT);
@@ -210,6 +215,43 @@ CCAssignFn *MipsFastISel::CCAssignFnForCall(CallingConv::ID CC) const {
return CC_MipsO32;
}
+unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
+ const Value *LHS, const Value *RHS) {
+ // Canonicalize immediates to the RHS first.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
+ std::swap(LHS, RHS);
+
+ unsigned Opc;
+ if (ISDOpc == ISD::AND) {
+ Opc = Mips::AND;
+ } else if (ISDOpc == ISD::OR) {
+ Opc = Mips::OR;
+ } else if (ISDOpc == ISD::XOR) {
+ Opc = Mips::XOR;
+ } else
+ llvm_unreachable("unexpected opcode");
+
+ unsigned LHSReg = getRegForValue(LHS);
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ if (!ResultReg)
+ return 0;
+
+ unsigned RHSReg;
+ if (!LHSReg)
+ return 0;
+
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ RHSReg = materializeInt(C, MVT::i32);
+ else
+ RHSReg = getRegForValue(RHS);
+
+ if (!RHSReg)
+ return 0;
+
+ emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg);
+ return ResultReg;
+}
+
unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return 0;
@@ -421,6 +463,21 @@ bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
return TLI.isTypeLegal(VT);
}
+bool MipsFastISel::isTypeSupported(Type *Ty, MVT &VT) {
+ if (Ty->isVectorTy())
+ return false;
+
+ if (isTypeLegal(Ty, VT))
+ return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT))
return true;
@@ -671,6 +728,33 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
return false;
}
+bool MipsFastISel::selectLogicalOp(const Instruction *I) {
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT))
+ return false;
+
+ unsigned ResultReg;
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ case Instruction::And:
+ ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Or:
+ ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Xor:
+ ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ }
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
bool MipsFastISel::selectLoad(const Instruction *I) {
// Atomic loads need special handling.
if (cast<LoadInst>(I)->isAtomic())
@@ -1083,7 +1167,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
@@ -1312,6 +1396,10 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
return selectLoad(I);
case Instruction::Store:
return selectStore(I);
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return selectLogicalOp(I);
case Instruction::Br:
return selectBranch(I);
case Instruction::Ret:
@@ -1354,7 +1442,7 @@ unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
void MipsFastISel::simplifyAddress(Address &Addr) {
if (!isInt<16>(Addr.getOffset())) {
unsigned TempReg =
- materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass);
+ materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass);
unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::ADDu, DestReg).addReg(TempReg).addReg(Addr.getReg());
Addr.setReg(DestReg);
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 21fc8ce..c78c329 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -230,9 +230,18 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
}
bool MipsDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
- OutOps.push_back(Op);
- return false;
+ // All memory constraints can at least accept raw pointers.
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_R:
+ case InlineAsm::Constraint_ZC:
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 6b72877..aec731e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -125,7 +125,7 @@ private:
virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
};
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 9253b2e..e4bae03 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -617,6 +617,33 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2);
+
+ ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(ValueIfFalse);
+ if (!FalseC || FalseC->getZExtValue())
+ return SDValue();
+
+ // Since RHS (False) is 0, we swap the order of the True/False operands
+ // (obviously also inverting the condition) so that we can
+ // take advantage of conditional moves using the $0 register.
+ // Example:
+ // return (a != 0) ? x : 0;
+ // load $reg, x
+ // movz $reg, $0, a
+ unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F :
+ MipsISD::CMovFP_T;
+
+ SDValue FCC = N->getOperand(1), Glue = N->getOperand(3);
+ return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(),
+ ValueIfFalse, FCC, ValueIfTrue, Glue);
+}
+
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
@@ -750,6 +777,9 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
return performSELECTCombine(N, DAG, DCI, Subtarget);
+ case MipsISD::CMovFP_F:
+ case MipsISD::CMovFP_T:
+ return performCMovFPCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
@@ -2451,7 +2481,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
if (Subtarget.inMips16HardFloat()) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
@@ -3001,6 +3032,15 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
+bool
+MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+ if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) {
+ if (Type == MVT::i32)
+ return true;
+ }
+ return IsSigned;
+}
+
SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,
@@ -3133,6 +3173,10 @@ getConstraintType(const std::string &Constraint) const
return C_Memory;
}
}
+
+ if (Constraint == "ZC")
+ return C_Memory;
+
return TargetLowering::getConstraintType(Constraint);
}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 9f86a43..40b6661 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -475,6 +475,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
+ bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
+
// Inline asm support
ConstraintType
getConstraintType(const std::string &Constraint) const override;
@@ -503,6 +505,15 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "R")
+ return InlineAsm::Constraint_R;
+ else if (ConstraintCode == "ZC")
+ return InlineAsm::Constraint_ZC;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index db149d4..7b2b289 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -29,7 +29,7 @@
#include "MipsGenInstrInfo.inc"
namespace llvm {
-
+class MipsSubtarget;
class MipsInstrInfo : public MipsGenInstrInfo {
virtual void anchor();
protected:
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 04a16b3..c937d2b 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1000,7 +1000,7 @@ class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
SDPatternOperator Op = null_frag>:
InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+ [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], II_EXT,
FrmR, opstr>, ISA_MIPS32R2;
class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
@@ -1008,7 +1008,7 @@ class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
[(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))],
- NoItinerary, FrmR, opstr>, ISA_MIPS32R2 {
+ II_INS, FrmR, opstr>, ISA_MIPS32R2 {
let Constraints = "$src = $rt";
}
@@ -1140,12 +1140,13 @@ def XORi : MMRel, ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16,
xor>,
ADDI_FM<0xe>;
def LUi : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16>, LUI_FM;
-
+let AdditionalPredicates = [NotInMicroMips] in {
/// Arithmetic Instructions (3-Operand, R-Type)
def ADDu : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
ADD_FM<0, 0x21>;
def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
ADD_FM<0, 0x23>;
+}
let Defs = [HI0, LO0] in
def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>,
ADD_FM<0x1c, 2>, ISA_MIPS32_NOT_32R6_64R6;
@@ -1579,6 +1580,8 @@ def : MipsInstAlias<"sltu $rt, $rs, $imm",
(SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm16:$imm), 0>;
def : MipsInstAlias<"xor $rs, $rt, $imm",
(XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"xor $rs, $imm",
+ (XORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
def : MipsInstAlias<"or $rs, $rt, $imm",
(ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
def : MipsInstAlias<"or $rs, $imm",
@@ -1639,20 +1642,21 @@ def : MipsInstAlias<"sync",
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
-class LoadImm32<string instr_asm, Operand Od, RegisterOperand RO> :
+class LoadImmediate32<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", uimm5, GPR32Opnd>;
+def LoadImm32 : LoadImmediate32<"li", uimm5, GPR32Opnd>;
-class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
+class LoadAddressFromReg32<string instr_asm, Operand MemOpnd,
+ RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
!strconcat(instr_asm, "\t$rt, $addr")> ;
-def LoadAddr32Reg : LoadAddress<"la", mem, GPR32Opnd>;
+def LoadAddrReg32 : LoadAddressFromReg32<"la", mem, GPR32Opnd>;
-class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
+class LoadAddressFromImm32<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", uimm5, GPR32Opnd>;
+def LoadAddrImm32 : LoadAddressFromImm32<"la", uimm5, GPR32Opnd>;
def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
"jal\t$rd, $rs"> ;
@@ -1761,9 +1765,11 @@ def : WrapperPat<tblockaddress, ADDiu, GPR32>;
def : WrapperPat<tjumptable, ADDiu, GPR32>;
def : WrapperPat<tglobaltlsaddr, ADDiu, GPR32>;
+let AdditionalPredicates = [NotInMicroMips] in {
// Mips does not have "not", so we expand our way
def : MipsPat<(not GPR32:$in),
(NOR GPR32Opnd:$in, ZERO)>;
+}
// extended loads
def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 821392e..5258181 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
using namespace llvm;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 30b93dc..09e722d 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -79,14 +79,19 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
if (GlobalBaseReg)
return GlobalBaseReg;
+ MipsSubtarget const &STI =
+ static_cast<const MipsSubtarget &>(MF.getSubtarget());
+
const TargetRegisterClass *RC =
- static_cast<const MipsSubtarget &>(MF.getSubtarget()).inMips16Mode()
+ STI.inMips16Mode()
? &Mips::CPU16RegsRegClass
- : static_cast<const MipsTargetMachine &>(MF.getTarget())
- .getABI()
- .IsN64()
- ? &Mips::GPR64RegClass
- : &Mips::GPR32RegClass;
+ : STI.inMicroMipsMode()
+ ? &Mips::GPRMM16RegClass
+ : static_cast<const MipsTargetMachine &>(MF.getTarget())
+ .getABI()
+ .IsN64()
+ ? &Mips::GPR64RegClass
+ : &Mips::GPR32RegClass;
return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC);
}
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index b011e8f..b18a673 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -8,15 +8,36 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsISelDAGToDAG.h"
-#include "MipsModuleISelDAGToDAG.h"
-#include "llvm/Support/Casting.h"
+#include "Mips.h"
+#include "MipsTargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
#define DEBUG_TYPE "mips-isel"
-namespace llvm {
+namespace {
+ class MipsModuleDAGToDAGISel : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
+ : MachineFunctionPass(ID), TM(TM_) {}
+
+ // Pass Name
+ const char *getPassName() const override {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ protected:
+ MipsTargetMachine &TM;
+ };
+
+ char MipsModuleDAGToDAGISel::ID = 0;
+}
bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
@@ -24,13 +45,6 @@ bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return false;
}
-char MipsModuleDAGToDAGISel::ID = 0;
-
-}
-
-
-llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) {
+llvm::FunctionPass *llvm::createMipsModuleISelDagPass(MipsTargetMachine &TM) {
return new MipsModuleDAGToDAGISel(TM);
}
-
-
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
deleted file mode 100644
index 85bae47..0000000
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===---- MipsModuleISelDAGToDAG.h - Change Subtarget --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a pass used to change the subtarget for the
-// Mips Instruction selector.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
-#define LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
-
-#include "Mips.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine
-// instructions for SelectionDAG operations.
-//===----------------------------------------------------------------------===//
-namespace llvm {
-
-class MipsModuleDAGToDAGISel : public MachineFunctionPass {
-public:
-
- static char ID;
-
- explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
- : MachineFunctionPass(ID), TM(TM_) {}
-
- // Pass Name
- const char *getPassName() const override {
- return "MIPS DAG->DAG Pattern Instruction Selection";
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
-protected:
- MipsTargetMachine &TM;
-};
-
-/// createMipsISelDag - This pass converts a legalized DAG into a
-/// MIPS-specific DAG, ready for instruction scheduling.
-FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM);
-}
-
-#endif
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 7aae964..b6cd791 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -11,14 +11,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsOs16.h"
+#include "llvm/IR/Instructions.h"
+#include "Mips.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#define DEBUG_TYPE "mips-os16"
+using namespace llvm;
+#define DEBUG_TYPE "mips-os16"
static cl::opt<std::string> Mips32FunctionMask(
"mips32-function-mask",
@@ -27,70 +29,83 @@ static cl::opt<std::string> Mips32FunctionMask(
cl::Hidden);
namespace {
+ class MipsOs16 : public ModulePass {
+ public:
+ static char ID;
+
+ MipsOs16() : ModulePass(ID) {}
+
+ const char *getPassName() const override {
+ return "MIPS Os16 Optimization";
+ }
+
+ bool runOnModule(Module &M) override;
+ };
+
+ char MipsOs16::ID = 0;
+}
- // Figure out if we need float point based on the function signature.
- // We need to move variables in and/or out of floating point
- // registers because of the ABI
- //
- bool needsFPFromSig(Function &F) {
- Type* RetType = F.getReturnType();
- switch (RetType->getTypeID()) {
+// Figure out if we need float point based on the function signature.
+// We need to move variables in and/or out of floating point
+// registers because of the ABI
+//
+static bool needsFPFromSig(Function &F) {
+ Type* RetType = F.getReturnType();
+ switch (RetType->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return true;
+ default:
+ ;
+ }
+ if (F.arg_size() >=1) {
+ Argument &Arg = F.getArgumentList().front();
+ switch (Arg.getType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
return true;
default:
;
}
- if (F.arg_size() >=1) {
- Argument &Arg = F.getArgumentList().front();
- switch (Arg.getType()->getTypeID()) {
- case Type::FloatTyID:
- case Type::DoubleTyID:
- return true;
- default:
- ;
- }
- }
- return false;
}
+ return false;
+}
- // Figure out if the function will need floating point operations
- //
- bool needsFP(Function &F) {
- if (needsFPFromSig(F))
- return true;
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+// Figure out if the function will need floating point operations
+//
+static bool needsFP(Function &F) {
+ if (needsFPFromSig(F))
+ return true;
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
- const Instruction &Inst = *I;
- switch (Inst.getOpcode()) {
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FCmp:
+ const Instruction &Inst = *I;
+ switch (Inst.getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FCmp:
+ return true;
+ default:
+ ;
+ }
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ DEBUG(dbgs() << "Working on call" << "\n");
+ Function &F_ = *CI->getCalledFunction();
+ if (needsFPFromSig(F_))
return true;
- default:
- ;
- }
- if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- DEBUG(dbgs() << "Working on call" << "\n");
- Function &F_ = *CI->getCalledFunction();
- if (needsFPFromSig(F_))
- return true;
- }
}
- return false;
- }
+ }
+ return false;
}
-namespace llvm {
bool MipsOs16::runOnModule(Module &M) {
@@ -136,12 +151,6 @@ bool MipsOs16::runOnModule(Module &M) {
return modified;
}
-char MipsOs16::ID = 0;
-
-}
-
-ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) {
+ModulePass *llvm::createMipsOs16Pass(MipsTargetMachine &TM) {
return new MipsOs16;
}
-
-
diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h
deleted file mode 100644
index 77183ec..0000000
--- a/lib/Target/Mips/MipsOs16.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//===---- MipsOs16.h for Mips Option -Os16 --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an optimization phase for the MIPS target.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPSOS16_H
-#define LLVM_LIB_TARGET_MIPS_MIPSOS16_H
-
-#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsTargetMachine.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-class MipsOs16 : public ModulePass {
-
-public:
- static char ID;
-
- MipsOs16() : ModulePass(ID) {
-
- }
-
- const char *getPassName() const override {
- return "MIPS Os16 Optimization";
- }
-
- bool runOnModule(Module &M) override;
-
-};
-
-ModulePass *createMipsOs16(MipsTargetMachine &TM);
-
-}
-
-#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 2110c03..0ea48b1 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -43,14 +43,14 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "MipsGenRegisterInfo.inc"
-MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
- : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {}
+MipsRegisterInfo::MipsRegisterInfo() : MipsGenRegisterInfo(Mips::RA) {}
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
const TargetRegisterClass *
MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
return Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
}
@@ -63,7 +63,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case Mips::GPR32RegClassID:
case Mips::GPR64RegClassID:
case Mips::DSPRRegClassID: {
- const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return 28 - TFI->hasFP(MF);
}
case Mips::FGR32RegClassID:
@@ -82,6 +82,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
/// Mips Callee Saved Registers
const MCPhysReg *
MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const MipsSubtarget &Subtarget = MF->getSubtarget<MipsSubtarget>();
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
@@ -100,8 +101,10 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_O32_SaveList;
}
-const uint32_t*
-MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+const uint32_t *
+MipsRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const {
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_RegMask;
@@ -135,6 +138,7 @@ getReservedRegs(const MachineFunction &MF) const {
};
BitVector Reserved(getNumRegs());
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
typedef TargetRegisterClass::const_iterator RegIter;
for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I)
@@ -257,6 +261,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
bool IsN64 =
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64();
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 9ec4a38..031b93e 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -21,15 +21,9 @@
#include "MipsGenRegisterInfo.inc"
namespace llvm {
-class MipsSubtarget;
-class Type;
-
class MipsRegisterInfo : public MipsGenRegisterInfo {
-protected:
- const MipsSubtarget &Subtarget;
-
public:
- MipsRegisterInfo(const MipsSubtarget &Subtarget);
+ MipsRegisterInfo();
/// getRegisterNumbering - Given the enum value for some register, e.g.
/// Mips::RA, return the number that it corresponds to (e.g. 31).
@@ -47,9 +41,9 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
static const uint32_t *getMips16RetHelperMask();
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 0761ded..a598c3f 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -258,8 +258,12 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
CurDAG->getTargetConstant(Mips::sub_32, VT));
}
- SDNode *AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT,
- SDValue(Carry, 0), RHS);
+ // Generate a second addition only if we know that RHS is not a
+ // constant-zero node.
+ SDNode *AddCarry = Carry;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C || C->getZExtValue())
+ AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
SDValue(AddCarry, 0));
@@ -378,6 +382,17 @@ bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
selectAddrDefault(Addr, Base, Offset);
}
+bool MipsSEDAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ if (selectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9))
+ return true;
+
+ return false;
+}
+
bool MipsSEDAGToDAGISel::selectAddrRegImm10(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
if (selectAddrFrameIndex(Addr, Base, Offset))
@@ -401,6 +416,17 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base,
return false;
}
+bool MipsSEDAGToDAGISel::selectAddrRegImm16(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ if (selectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16))
+ return true;
+
+ return false;
+}
+
bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
return selectAddrRegImm12(Addr, Base, Offset) ||
@@ -912,6 +938,60 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
return std::make_pair(false, nullptr);
}
+bool MipsSEDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) {
+ SDValue Base, Offset;
+
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ // All memory constraints can at least accept raw pointers.
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_R:
+ OutOps.push_back(Op);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ case InlineAsm::Constraint_m:
+ if (selectAddrRegImm16(Op, Base, Offset)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ OutOps.push_back(Op);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ case InlineAsm::Constraint_ZC:
+ // ZC matches whatever the pref, ll, and sc instructions can handle for the
+ // given subtarget.
+ if (Subtarget->inMicroMipsMode()) {
+ // On microMIPS, they can handle 12-bit offsets.
+ if (selectAddrRegImm12(Op, Base, Offset)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ } else if (Subtarget->hasMips32r6()) {
+ // On MIPS32r6/MIPS64r6, they can only handle 9-bit offsets.
+ if (selectAddrRegImm9(Op, Base, Offset)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ } else if (selectAddrRegImm16(Op, Base, Offset)) {
+ // Prior to MIPS32r6/MIPS64r6, they can handle 16-bit offsets.
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ // In all cases, 0-bit offsets are acceptable.
+ OutOps.push_back(Op);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ }
+ return true;
+}
+
FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
return new MipsSEDAGToDAGISel(TM);
}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 2d24eb4..a11fcf4 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -56,12 +56,18 @@ private:
bool selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const override;
+ bool selectAddrRegImm9(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
bool selectAddrRegImm10(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
bool selectAddrRegImm12(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ bool selectAddrRegImm16(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
bool selectIntAddrMM(SDValue Addr, SDValue &Base,
SDValue &Offset) const override;
@@ -111,6 +117,10 @@ private:
// Insert instructions to initialize the global base register in the
// first MBB of the function.
void initGlobalBaseReg(MachineFunction &MF);
+
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
};
FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 74f291f..b992579 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI)
: MipsInstrInfo(STI, STI.getRelocationModel() == Reloc::PIC_ ? Mips::B
: Mips::J),
- RI(STI) {}
+ RI() {}
const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
return RI;
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 55c6638..b89207e 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "MipsMachineFunction.h"
#include "MipsSEInstrInfo.h"
#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -41,8 +42,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-reg-info"
-MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST)
- : MipsRegisterInfo(ST) {}
+MipsSERegisterInfo::MipsSERegisterInfo() : MipsRegisterInfo() {}
bool MipsSERegisterInfo::
requiresRegisterScavenging(const MachineFunction &MF) const {
@@ -110,6 +110,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ bool isN64 =
+ static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
int MinCSFI = 0;
@@ -132,7 +134,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned FrameReg;
if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
- FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ FrameReg = isN64 ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
@@ -165,9 +167,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = II->getDebugLoc();
- unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+ unsigned ADDiu = isN64 ? Mips::DADDiu : Mips::ADDiu;
const TargetRegisterClass *RC =
- Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+ isN64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
unsigned Reg = RegInfo.createVirtualRegister(RC);
const MipsSEInstrInfo &TII =
@@ -183,7 +185,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// instructions.
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = II->getDebugLoc();
- unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDu = isN64 ? Mips::DADDu : Mips::ADDu;
unsigned NewImm = 0;
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo *>(
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index 6b70d07..ebae190 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -22,7 +22,7 @@ class MipsSEInstrInfo;
class MipsSERegisterInfo : public MipsRegisterInfo {
public:
- MipsSERegisterInfo(const MipsSubtarget &Subtarget);
+ MipsSERegisterInfo();
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index ea98199..54b5d28 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -65,7 +65,9 @@ def II_DSRL32 : InstrItinClass;
def II_DSRLV : InstrItinClass;
def II_DSUBU : InstrItinClass;
def II_DSUB : InstrItinClass;
+def II_EXT : InstrItinClass; // Any EXT instruction
def II_FLOOR : InstrItinClass;
+def II_INS : InstrItinClass; // Any INS instruction
def II_LB : InstrItinClass;
def II_LBU : InstrItinClass;
def II_LD : InstrItinClass;
@@ -198,6 +200,8 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_DSUB , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DROTR , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DROTRV , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_EXT , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_INS , [InstrStage<1, [ALU]>]>,
InstrItinData<II_LUI , [InstrStage<1, [ALU]>]>,
InstrItinData<II_MOVF , [InstrStage<1, [ALU]>]>,
InstrItinData<II_MOVN , [InstrStage<1, [ALU]>]>,
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 86c8931..79f6617 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -14,14 +14,11 @@
#include "MipsTargetMachine.h"
#include "Mips.h"
#include "Mips16FrameLowering.h"
-#include "Mips16HardFloat.h"
#include "Mips16ISelDAGToDAG.h"
#include "Mips16ISelLowering.h"
#include "Mips16InstrInfo.h"
#include "MipsFrameLowering.h"
#include "MipsInstrInfo.h"
-#include "MipsModuleISelDAGToDAG.h"
-#include "MipsOs16.h"
#include "MipsSEFrameLowering.h"
#include "MipsSEISelDAGToDAG.h"
#include "MipsSEISelLowering.h"
@@ -34,6 +31,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+
using namespace llvm;
#define DEBUG_TYPE "mips"
@@ -46,8 +44,12 @@ extern "C" void LLVMInitializeMipsTarget() {
RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
}
-static std::string computeDataLayout(bool isLittle, MipsABIInfo &ABI) {
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+ const TargetOptions &Options,
+ bool isLittle) {
std::string Ret = "";
+ MipsABIInfo ABI =
+ MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions);
// There are both little and big endian mips.
if (isLittle)
@@ -86,11 +88,11 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
+ CPU, FS, Options, RM, CM, OL),
isLittle(isLittle), TLOF(make_unique<MipsTargetObjectFile>()),
ABI(MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions)),
- DL(computeDataLayout(isLittle, ABI)), Subtarget(nullptr),
- DefaultSubtarget(TT, CPU, FS, isLittle, *this),
+ Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this),
NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
isLittle, *this),
Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16",
@@ -209,14 +211,14 @@ void MipsPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
addPass(createAtomicExpandPass(&getMipsTargetMachine()));
if (getMipsSubtarget().os16())
- addPass(createMipsOs16(getMipsTargetMachine()));
+ addPass(createMipsOs16Pass(getMipsTargetMachine()));
if (getMipsSubtarget().inMips16HardFloat())
- addPass(createMips16HardFloat(getMipsTargetMachine()));
+ addPass(createMips16HardFloatPass(getMipsTargetMachine()));
}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsPassConfig::addInstSelector() {
- addPass(createMipsModuleISelDag(getMipsTargetMachine()));
+ addPass(createMipsModuleISelDagPass(getMipsTargetMachine()));
addPass(createMips16ISelDag(getMipsTargetMachine()));
addPass(createMipsSEISelDag(getMipsTargetMachine()));
return false;
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index afd0cea..5427d6a 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -31,7 +31,6 @@ class MipsTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
// Selected ABI
MipsABIInfo ABI;
- const DataLayout DL; // Calculates type size & alignment
MipsSubtarget *Subtarget;
MipsSubtarget DefaultSubtarget;
MipsSubtarget NoMips16Subtarget;
@@ -47,8 +46,7 @@ public:
TargetIRAnalysis getTargetIRAnalysis() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const MipsSubtarget *getSubtargetImpl() const override {
+ const MipsSubtarget *getSubtargetImpl() const {
if (Subtarget)
return Subtarget;
return &DefaultSubtarget;
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index c07693e..723b63b 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -9,6 +9,7 @@
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
@@ -44,7 +45,7 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
ELF::SHF_WRITE | ELF::SHF_ALLOC);
- this->TM = &TM;
+ this->TM = &static_cast<const MipsTargetMachine &>(TM);
}
// A address must be loaded from a small section if its size is less than the
@@ -84,7 +85,8 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
bool MipsTargetObjectFile::
IsGlobalInSmallSectionImpl(const GlobalValue *GV,
const TargetMachine &TM) const {
- const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ const MipsSubtarget &Subtarget =
+ *static_cast<const MipsTargetMachine &>(TM).getSubtargetImpl();
// Return if small section is not available.
if (!Subtarget.useSmallSection())
@@ -127,9 +129,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
/// Return true if this constant should be placed into small data section.
bool MipsTargetObjectFile::
IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const {
- return (
- TM.getSubtarget<MipsSubtarget>().useSmallSection() && LocalSData &&
- IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(CN->getType())));
+ return (static_cast<const MipsTargetMachine &>(TM)
+ .getSubtargetImpl()
+ ->useSmallSection() &&
+ LocalSData && IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(
+ CN->getType())));
}
const MCSection *MipsTargetObjectFile::
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 3a2b298..45ed9d0 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -13,11 +13,11 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
namespace llvm {
-
+class MipsTargetMachine;
class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
const MCSection *SmallDataSection;
const MCSection *SmallBSSSection;
- const TargetMachine *TM;
+ const MipsTargetMachine *TM;
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index b3b8296..1ff041d 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -92,9 +92,9 @@ public:
}
virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
- virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){};
- virtual void emitMipsAbiFlags(){};
+ virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value);
void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
+ void reallowModuleDirective() { ModuleDirectiveAllowed = true; }
bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
// This method enables template classes to set internal abi flags
@@ -197,7 +197,6 @@ public:
bool Is32BitABI) override;
void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override;
- void emitMipsAbiFlags() override;
};
// This part is for ELF object output
@@ -240,7 +239,7 @@ public:
// ABI Flags
void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
- void emitMipsAbiFlags() override;
+ void emitMipsAbiFlags();
};
}
#endif
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 3a4a19d..cdd2f1f 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -29,7 +29,6 @@ set(NVPTXCodeGen_sources
NVPTXTargetMachine.cpp
NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp
- NVPTXutil.cpp
NVVMReflect.cpp
)
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 11d737e..b9df3d1 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -39,6 +39,8 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
InlineAsmEnd = " inline asm";
SupportsDebugInformation = CompileForDebugging;
+ // PTX does not allow .align on functions.
+ HasFunctionAlignment = false;
HasDotTypeDotSizeDirective = false;
Data8bitsDirective = " .b8 ";
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 158ca90..2b4d864 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -71,35 +71,23 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
// Force static initialization.
extern "C" void LLVMInitializeNVPTXTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
- RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
- createNVPTXMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
- createNVPTXMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
- createNVPTXMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
- createNVPTXMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
- createNVPTXMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
- createNVPTXMCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget32,
- createNVPTXMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget64,
- createNVPTXMCInstPrinter);
+ for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createNVPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createNVPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createNVPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createNVPTXMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createNVPTXMCInstPrinter);
+ }
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index 98821d2..bfd5123 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
+#include <stdint.h>
+
namespace llvm {
class Target;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 1f37696..4f3ccf4 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -12,11 +12,33 @@
//===----------------------------------------------------------------------===//
#include "NVPTXAllocaHoisting.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+using namespace llvm;
-namespace llvm {
+namespace {
+// Hoisting the alloca instructions in the non-entry blocks to the entry
+// block.
+class NVPTXAllocaHoisting : public FunctionPass {
+public:
+ static char ID; // Pass ID
+ NVPTXAllocaHoisting() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ }
+
+ const char *getPassName() const override {
+ return "NVPTX specific alloca hoisting";
+ }
+
+ bool runOnFunction(Function &function) override;
+};
+} // namespace
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
bool functionModified = false;
@@ -36,11 +58,15 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
return functionModified;
}
-char NVPTXAllocaHoisting::ID = 1;
-static RegisterPass<NVPTXAllocaHoisting>
-X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
- "blocks to the entry block");
+char NVPTXAllocaHoisting::ID = 0;
+
+namespace llvm {
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
+}
-FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
+INITIALIZE_PASS(
+ NVPTXAllocaHoisting, "alloca-hoisting",
+ "Hoisting alloca instructions in non-entry blocks to the entry block",
+ false, false)
-} // end namespace llvm
+FunctionPass *llvm::createAllocaHoisting() { return new NVPTXAllocaHoisting; }
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index c343980..7a6fc7d 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -14,38 +14,10 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
namespace llvm {
-
class FunctionPass;
-class Function;
-
-// Hoisting the alloca instructions in the non-entry blocks to the entry
-// block.
-class NVPTXAllocaHoisting : public FunctionPass {
-public:
- static char ID; // Pass ID
- NVPTXAllocaHoisting() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- }
-
- const char *getPassName() const override {
- return "NVPTX specific alloca hoisting";
- }
-
- bool runOnFunction(Function &function) override;
-};
extern FunctionPass *createAllocaHoisting();
-
} // end namespace llvm
#endif
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 833db04..cc58b07 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -504,8 +504,7 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- const TargetRegisterInfo *TRI = nvptxSubtarget->getRegisterInfo();
- if (TRI->isVirtualRegister(RegNo)) {
+ if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
OutStreamer.AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
@@ -522,15 +521,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If none of reqntid* is specified, don't output reqntid directive.
unsigned reqntidx, reqntidy, reqntidz;
bool specified = false;
- if (llvm::getReqNTIDx(F, reqntidx) == false)
+ if (!llvm::getReqNTIDx(F, reqntidx))
reqntidx = 1;
else
specified = true;
- if (llvm::getReqNTIDy(F, reqntidy) == false)
+ if (!llvm::getReqNTIDy(F, reqntidy))
reqntidy = 1;
else
specified = true;
- if (llvm::getReqNTIDz(F, reqntidz) == false)
+ if (!llvm::getReqNTIDz(F, reqntidz))
reqntidz = 1;
else
specified = true;
@@ -544,15 +543,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If none of maxntid* is specified, don't output maxntid directive.
unsigned maxntidx, maxntidy, maxntidz;
specified = false;
- if (llvm::getMaxNTIDx(F, maxntidx) == false)
+ if (!llvm::getMaxNTIDx(F, maxntidx))
maxntidx = 1;
else
specified = true;
- if (llvm::getMaxNTIDy(F, maxntidy) == false)
+ if (!llvm::getMaxNTIDy(F, maxntidy))
maxntidy = 1;
else
specified = true;
- if (llvm::getMaxNTIDz(F, maxntidz) == false)
+ if (!llvm::getMaxNTIDz(F, maxntidz))
maxntidz = 1;
else
specified = true;
@@ -673,7 +672,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
}
for (const User *UU : U->users())
- if (usedInOneFunc(UU, oneFunc) == false)
+ if (!usedInOneFunc(UU, oneFunc))
return false;
return true;
@@ -687,7 +686,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
* 3. Is the global variable referenced only in one function?
*/
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
- if (gv->hasInternalLinkage() == false)
+ if (!gv->hasInternalLinkage())
return false;
const PointerType *Pty = gv->getType();
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
@@ -696,7 +695,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
const Function *oneFunc = nullptr;
bool flag = usedInOneFunc(gv, oneFunc);
- if (flag == false)
+ if (!flag)
return false;
if (!oneFunc)
return false;
@@ -1472,7 +1471,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
- if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+ if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
@@ -1788,7 +1787,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
+ ConstantFoldConstantExpression(Cexpr, *TD))) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1810,7 +1809,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
+ ConstantFoldConstantExpression(Cexpr, *TD))) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
@@ -2085,13 +2084,6 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
}
}
-
-// Force static initialization.
-extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
- RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
- RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
-}
-
void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
std::stringstream temp;
LineReader *reader = this->getReader(filename.str());
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 7e6b5e8..9b11e70 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -92,8 +92,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
bool EmitGeneric;
public:
- AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- : size(_size), buffer(_size), O(_O), AP(_AP) {
+ AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP)
+ : size(size), buffer(size), O(O), AP(AP) {
curpos = 0;
numSymbols = 0;
EmitGeneric = AP.EmitGeneric;
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index f3a095d..6d7c99c 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -123,10 +123,9 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
// =>
// %0 = gep X, indices
// %1 = addrspacecast %0
- GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0),
- Indices,
- GEP->getName(),
- GEPI);
+ GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(
+ GEP->getSourceElementType(), Cast->getOperand(0), Indices,
+ GEP->getName(), GEPI);
NewGEPI->setIsInBounds(GEP->isInBounds());
GEP->replaceAllUsesWith(
new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 86d134b..850c020 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -343,6 +343,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
// GetElementPtrConstantExpr
return cast<GEPOperator>(C)->isInBounds()
? Builder.CreateGEP(
+ cast<GEPOperator>(C)->getSourceElementType(),
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1))
: Builder.CreateInBoundsGEP(
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index e01c780..52c5e1b 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -78,10 +78,7 @@ bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
return UsePrecSqrtF32;
} else {
// Otherwise, use sqrt.approx if fast math is enabled
- if (TM.Options.UnsafeFPMath)
- return false;
- else
- return true;
+ return !TM.Options.UnsafeFPMath;
}
}
@@ -5044,12 +5041,12 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default:
return true;
- case 'm': // memory
+ case InlineAsm::Constraint_m: // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ca432b5..6d845c9 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -48,7 +48,7 @@ public:
const NVPTXSubtarget *Subtarget;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
private:
// Include the pieces autogenerated from the target description.
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 1dc81f7..ff74e6e 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -930,7 +930,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
}
first = false;
- if (Outs[OIdx].Flags.isByVal() == false) {
+ if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
unsigned align = 0;
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
@@ -1075,7 +1075,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT VT = Outs[OIdx].VT;
Type *Ty = Args[i].Ty;
- if (Outs[OIdx].Flags.isByVal() == false) {
+ if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType()) {
// aggregate
SmallVector<EVT, 16> vtparts;
@@ -1459,7 +1459,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
- bool needTruncate = sz < 8 ? true : false;
+ bool needTruncate = sz < 8;
if (NumElts == 1) {
// Just a simple load
@@ -1577,7 +1577,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = VTs[i].getSizeInBits();
unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
- bool needTruncate = sz < 8 ? true : false;
+ bool needTruncate = sz < 8;
if (VTs[i].isInteger() && (sz < 8))
sz = 8;
@@ -1940,9 +1940,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
}
// Then any remaining arguments
- for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
- Ops.push_back(N->getOperand(i));
- }
+ Ops.append(N->op_begin() + 2, N->op_end());
SDValue NewSt = DAG.getMemIntrinsicNode(
Opcode, DL, DAG.getVTList(MVT::Other), Ops,
@@ -2118,7 +2116,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// to newly created nodes. The SDNodes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+ if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
if (Ty->isAggregateType()) {
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> offsets;
@@ -4494,7 +4492,6 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete DwarfLocSection;
delete DwarfARangesSection;
delete DwarfRangesSection;
- delete DwarfMacroInfoSection;
}
const MCSection *
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 1b4da2c..8594364 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -497,6 +497,12 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f0c3663..578401a 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTXLowerAggrCopies.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -22,10 +24,33 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "nvptx"
using namespace llvm;
-namespace llvm { FunctionPass *createLowerAggrCopies(); }
+namespace {
+// actual analysis class, which is a functionpass
+struct NVPTXLowerAggrCopies : public FunctionPass {
+ static char ID;
+
+ NVPTXLowerAggrCopies() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ static const unsigned MaxAggrCopySize = 128;
+
+ const char *getPassName() const override {
+ return "Lower aggregate copies/intrinsics into loops";
+ }
+};
+} // namespace
char NVPTXLowerAggrCopies::ID = 0;
@@ -104,7 +129,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
SmallVector<MemTransferInst *, 4> aggrMemcpys;
SmallVector<MemSetInst *, 4> aggrMemsets;
- const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ const DataLayout &DL = F.getParent()->getDataLayout();
LLVMContext &Context = F.getParent()->getContext();
//
@@ -117,10 +142,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
++II) {
if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (load->hasOneUse() == false)
+ if (!load->hasOneUse())
continue;
- if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
continue;
User *use = load->user_back();
@@ -166,7 +191,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
Value *srcAddr = load->getOperand(0);
Value *dstAddr = store->getOperand(1);
- unsigned numLoads = DL->getTypeStoreSize(load->getType());
+ unsigned numLoads = DL.getTypeStoreSize(load->getType());
Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index da301d5..3c39f53 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -15,35 +15,10 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
namespace llvm {
+class FunctionPass;
-// actual analysis class, which is a functionpass
-struct NVPTXLowerAggrCopies : public FunctionPass {
- static char ID;
-
- NVPTXLowerAggrCopies() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- }
-
- bool runOnFunction(Function &F) override;
-
- static const unsigned MaxAggrCopySize = 128;
-
- const char *getPassName() const override {
- return "Lower aggregate copies/intrinsics into loops";
- }
-};
-
-extern FunctionPass *createLowerAggrCopies();
+FunctionPass *createLowerAggrCopies();
}
#endif
diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
index 3149399..68dfbb7 100644
--- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
@@ -35,7 +35,8 @@ namespace llvm {
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
}
-class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+namespace {
+class NVPTXLowerStructArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
void handleStructPtrArgs(Function &);
@@ -48,6 +49,7 @@ public:
return "Copy structure (byval *) arguments to stack";
}
};
+} // namespace
char NVPTXLowerStructArgs::ID = 1;
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index d39a394..f075b8b 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -29,8 +29,8 @@ private:
const VariantKind Kind;
const APFloat Flt;
- explicit NVPTXFloatMCExpr(VariantKind _Kind, APFloat _Flt)
- : Kind(_Kind), Flt(_Flt) {}
+ explicit NVPTXFloatMCExpr(VariantKind Kind, APFloat Flt)
+ : Kind(Kind), Flt(Flt) {}
public:
/// @name Construction
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 5ca96e4..6e97f9ef 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -78,7 +78,7 @@ NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
/// NVPTX Callee Saved Registers
const MCPhysReg *
-NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
static const MCPhysReg CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 75b8f15..c310a9c 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -35,8 +35,7 @@ public:
//------------------------------------------------------
// NVPTX callee saved registers
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index f1d3cb4..0d2627d 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -26,7 +26,7 @@ namespace llvm {
class NVPTXSection : public MCSection {
virtual void anchor();
public:
- NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
+ NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
virtual ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
@@ -36,11 +36,8 @@ public:
const MCExpr *Subsection) const override {}
/// Base address of PTX sections is zero.
- bool isBaseAddressKnownZero() const override { return true; }
bool UseCodeAlign() const override { return false; }
bool isVirtualSection() const override { return false; }
- std::string getLabelBeginName() const override { return ""; }
- std::string getLabelEndName() const override { return ""; }
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 1a267a6..1b6bc71 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -50,6 +50,7 @@ using namespace llvm;
namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
@@ -64,6 +65,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
@@ -86,9 +88,10 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit),
- TLOF(make_unique<NVPTXTargetObjectFile>()),
- DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) {
+ : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
+ CM, OL),
+ is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
+ Subtarget(TT, CPU, FS, *this) {
if (Triple(TT).getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
else
@@ -183,8 +186,7 @@ void NVPTXPassConfig::addIRPasses() {
}
bool NVPTXPassConfig::addInstSelector() {
- const NVPTXSubtarget &ST =
- getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
+ const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index a81abfe..b8df5af 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -27,7 +27,6 @@ namespace llvm {
class NVPTXTargetMachine : public LLVMTargetMachine {
bool is64bit;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment
NVPTX::DrvInterface drvInterface;
NVPTXSubtarget Subtarget;
@@ -40,8 +39,10 @@ public:
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
~NVPTXTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const NVPTXSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
+ const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
bool is64Bit() const { return is64bit; }
NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
ManagedStringPool *getManagedStrPool() const {
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 00ceca5..5d9ab0d 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -41,7 +41,6 @@ public:
DwarfLocSection = nullptr;
DwarfARangesSection = nullptr;
DwarfRangesSection = nullptr;
- DwarfMacroInfoSection = nullptr;
}
virtual ~NVPTXTargetObjectFile();
@@ -83,8 +82,6 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
DwarfRangesSection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
- DwarfMacroInfoSection =
- new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
const MCSection *getSectionForConstant(SectionKind Kind,
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index cf1feac..1f178af 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -293,12 +293,9 @@ bool llvm::isKernelFunction(const Function &F) {
unsigned x = 0;
bool retval = llvm::findOneNVVMAnnotation(
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
- if (retval == false) {
+ if (!retval) {
// There is no NVVM metadata, check the calling convention
- if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
- return true;
- else
- return false;
+ return F.getCallingConv() == llvm::CallingConv::PTX_Kernel;
}
return (x == 1);
}
@@ -307,7 +304,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
std::vector<unsigned> Vs;
bool retval = llvm::findAllNVVMAnnotation(
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
- if (retval == false)
+ if (!retval)
return false;
for (int i = 0, e = Vs.size(); i < e; i++) {
unsigned v = Vs[i];
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
deleted file mode 100644
index 5f074b3..0000000
--- a/lib/Target/NVPTX/NVPTXutil.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTXutil.h"
-#include "NVPTX.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-bool isParamLoad(const MachineInstr *MI) {
- if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
- (MI->getOpcode() != NVPTX::LD_i64_avar))
- return false;
- if (MI->getOperand(2).isImm() == false)
- return false;
- if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
- return false;
- return true;
-}
-
-#define DATA_MASK 0x7f
-#define DIGIT_WIDTH 7
-#define MORE_BYTES 0x80
-
-static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
- char *a;
- char *end = space + splen;
-
- a = space;
- do {
- unsigned char uc;
-
- if (a >= end)
- return 1;
- uc = val & DATA_MASK;
- val >>= DIGIT_WIDTH;
- if (val != 0)
- uc |= MORE_BYTES;
- *a = uc;
- a++;
- } while (val);
- *nbytes = a - space;
- return 0;
-}
-
-#undef DATA_MASK
-#undef DIGIT_WIDTH
-#undef MORE_BYTES
-
-uint64_t encode_leb128(const char *str) {
- union {
- uint64_t x;
- char a[8];
- } temp64;
-
- temp64.x = 0;
-
- for (unsigned i = 0, e = strlen(str); i != e; ++i)
- temp64.a[i] = str[e - 1 - i];
-
- char encoded[16];
- int nbytes;
-
- int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
-
- (void) retval;
- assert(retval == 0 && "Encoding to leb128 failed");
-
- assert(nbytes <= 8 &&
- "Cannot support register names with leb128 encoding > 8 bytes");
-
- temp64.x = 0;
- for (int i = 0; i < nbytes; ++i)
- temp64.a[i] = encoded[i];
-
- return temp64.x;
-}
-
-} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h
deleted file mode 100644
index 1915dac..0000000
--- a/lib/Target/NVPTX/NVPTXutil.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-
-namespace llvm {
-bool isParamLoad(const MachineInstr *);
-uint64_t encode_leb128(const char *str);
-}
-
-#endif
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index a8d6b95..5e375b7 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <map>
#include <sstream>
@@ -137,6 +138,26 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
// ConstantArray can be found successfully, see if it can be
// found in VarMap. If so, replace the uses of CallInst with the
// value found in VarMap. If not, replace the use with value 0.
+
+ // IR for __nvvm_reflect calls differs between CUDA versions:
+ // CUDA 6.5 and earlier uses this sequence:
+ // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8
+ // (i8 addrspace(4)* getelementptr inbounds
+ // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+ // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+ //
+ // Value returned by Sym->getOperand(0) is a Constant with a
+ // ConstantDataSequential operand which can be converted to string and used
+ // for lookup.
+ //
+ // CUDA 7.0 does it slightly differently:
+ // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast
+ // (i8 addrspace(1)* getelementptr inbounds
+ // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
+ //
+ // In this case, we get a Constant with a GlobalVariable operand and we need
+ // to dig deeper to find its initializer with the string we'll use for lookup.
+
for (User *U : ReflectFunction->users()) {
assert(isa<CallInst>(U) && "Only a call instruction can use _reflect");
CallInst *Reflect = cast<CallInst>(U);
@@ -158,16 +179,23 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
const Value *Sym = GEP->getOperand(0);
assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
- const Constant *SymStr = cast<Constant>(Sym);
+ const Value *Operand = cast<Constant>(Sym)->getOperand(0);
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) {
+ // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's
+ // initializer.
+ assert(GV->hasInitializer() &&
+ "Format of _reflect function not recognized");
+ const Constant *Initializer = GV->getInitializer();
+ Operand = Initializer;
+ }
- assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+ assert(isa<ConstantDataSequential>(Operand) &&
"Format of _reflect function not recognized");
-
- assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+ assert(cast<ConstantDataSequential>(Operand)->isCString() &&
"Format of _reflect function not recognized");
std::string ReflectArg =
- cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+ cast<ConstantDataSequential>(Operand)->getAsString();
ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index bf00e73..99a1633 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -271,9 +271,9 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), MII(_MII) {
+ PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), MII(MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -425,7 +425,9 @@ public:
bool isToken() const override { return Kind == Token; }
bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+ bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 0ed0723..a9f5fc7 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -189,6 +189,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, CRRegs);
}
+static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index c287fbe..311a4f2 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -214,6 +214,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 1 && "Invalid u1imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -221,6 +228,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 8 && "Invalid u3imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 6ead19b..8718743 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -43,7 +43,9 @@ public:
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = nullptr);
+ void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 2b4f2d8..d8fab5b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -45,6 +45,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
void PPCELFMCAsmInfo::anchor() { }
PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
+ // FIXME: This is not always needed. For example, it is not needed in the
+ // v2 abi.
+ NeedsLocalForSize = true;
+
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 06d380e..b9f0afb 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -39,10 +40,10 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
bool IsLittleEndian;
public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle)
- : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) {
- }
-
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), CTX(ctx),
+ IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+
~PPCMCCodeEmitter() {}
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -158,14 +159,11 @@ public:
};
} // end anonymous namespace
-
+
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
- Triple TT(STI.getTargetTriple());
- bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
- return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian);
+ return new PPCMCCodeEmitter(MCII, Ctx);
}
unsigned PPCMCCodeEmitter::
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index f0a6bb9..1c840d9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -36,9 +36,8 @@ private:
int64_t EvaluateAsInt64(int64_t Value) const;
- explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
- bool _IsDarwin)
- : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
+ explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
+ : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
public:
/// @name Construction
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index f2da389..2f7a768 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -145,6 +145,7 @@ public:
}
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
+ Streamer.EmitValueToAlignment(8);
Streamer.EmitSymbolValue(&S, 8);
}
void emitMachine(StringRef CPU) override {
@@ -222,32 +223,19 @@ public:
};
}
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- if (Triple(TT).isOSDarwin()) {
- MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- new PPCTargetMachOStreamer(*S);
- return S;
- }
-
- MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- new PPCTargetELFStreamer(*S);
- return S;
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new PPCTargetAsmStreamer(S, OS);
}
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
-
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new PPCTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new PPCTargetELFStreamer(S);
+ return new PPCTargetMachOStreamer(S);
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
@@ -261,60 +249,36 @@ static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
}
extern "C" void LLVMInitializePowerPCTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
- RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
- RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget,
- createPPCMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget,
- createPPCMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
- createPPCMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
- createPPCMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget,
- createPPCMCSubtargetInfo);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget,
- createPPCMCCodeEmitter);
-
+ for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter);
+
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget,
- createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend);
+
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter);
+ }
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 68f7f7a..8b1e3b4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -34,10 +34,9 @@ class raw_ostream;
extern Target ThePPC32Target;
extern Target ThePPC64Target;
extern Target ThePPC64LETarget;
-
+
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index f53add5..f175f6d 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -18,7 +18,7 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// PowerPC Subtarget features.
//
-
+
//===----------------------------------------------------------------------===//
// CPU Directives //
//===----------------------------------------------------------------------===//
@@ -112,14 +112,21 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
"Enable POWER8 Altivec instructions",
[FeatureAltivec]>;
+def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
+ "Enable POWER8 Crypto instructions",
+ [FeatureP8Altivec]>;
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
[FeatureVSX, FeatureP8Altivec]>;
-
+def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
+ "HasPartwordAtomics", "true",
+ "Enable l[bh]arx and st[bh]cx.">;
def FeatureInvariantFunctionDescriptors :
SubtargetFeature<"invariant-function-descriptors",
"HasInvariantFunctionDescriptors", "true",
"Assume function descriptors are invariant">;
+def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
+ "Enable Hardware Transactional Memory instructions">;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
@@ -256,11 +263,11 @@ def ProcessorFeatures {
[DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX,
FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt,
FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
- DeprecatedMFTB, DeprecatedDST];
+ FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST];
}
def : ProcessorModel<"970", G5Model,
@@ -339,7 +346,7 @@ def : ProcessorModel<"pwr7", P7Model,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */,
+ Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1327290..cd60906 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -69,12 +69,11 @@ namespace {
protected:
MapVector<MCSymbol*, MCSymbol*> TOC;
const PPCSubtarget *Subtarget;
- uint64_t TOCLabelID;
StackMaps SM;
public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), TOCLabelID(0), SM(*this) {}
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
const char *getPassName() const override {
return "PowerPC Assembly Printer";
@@ -321,17 +320,9 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
- const DataLayout *DL = TM.getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
-
- // To avoid name clash check if the name already exists.
- while (!TOCEntry) {
- if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
- "C" + Twine(TOCLabelID++)) == nullptr) {
- TOCEntry = GetTempSymbol("C", TOCLabelID);
- }
- }
-
+ if (!TOCEntry)
+ TOCEntry = createTempSymbol("C");
return TOCEntry;
}
@@ -1068,8 +1059,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.SwitchSection(Section);
OutStreamer.EmitLabel(CurrentFnSym);
OutStreamer.EmitValueToAlignment(8);
- MCSymbol *Symbol1 =
- OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+ MCSymbol *Symbol1 = CurrentFnSymForSize;
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
@@ -1082,11 +1072,6 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// Emit a null environment pointer.
OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current.first, Current.second);
-
- MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
- ".L." + Twine(CurrentFnSym->getName()));
- OutStreamer.EmitLabel(RealFnSym);
- CurrentFnSymForSize = RealFnSym;
}
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 5af8aab..c595f44 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -171,8 +171,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ DL = &F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
@@ -533,7 +532,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// selected branch.
MadeChange = true;
- SCEVExpander SCEVE(*SE, "loopcnt");
+ SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
LLVMContext &C = SE->getContext();
Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
Type::getInt32Ty(C);
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 54532b5..fbd7b6d 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -675,8 +675,18 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
case PPC::STFS: Opc = PPC::STFSX; break;
case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+
+ auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg);
+
+ // If we have an index register defined we use it in the store inst,
+ // otherwise we use X0 as base as it makes the vector instructions to
+ // use zero in the computation of the effective address regardless the
+ // content of the register.
+ if (IndexReg)
+ MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
+ else
+ MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
}
return true;
@@ -1532,7 +1542,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b10e854..3ac8e94 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -186,20 +186,34 @@ namespace {
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override {
- // We need to make sure that this one operand does not end up in r0
- // (because we might end up lowering this as 0(%op)).
- const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
- const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
- SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
- SDValue NewOp =
- SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- SDLoc(Op), Op.getValueType(),
- Op, RC), 0);
-
- OutOps.push_back(NewOp);
- return false;
+
+ switch(ConstraintID) {
+ default:
+ errs() << "ConstraintID: " << ConstraintID << "\n";
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_es:
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_Z:
+ case InlineAsm::Constraint_Zy:
+ // We need to make sure that this one operand does not end up in r0
+ // (because we might end up lowering this as 0(%op)).
+ const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
+ const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ SDValue NewOp =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ SDLoc(Op), Op.getValueType(),
+ Op, RC), 0);
+
+ OutOps.push_back(NewOp);
+ return false;
+ }
+ return true;
}
void InsertVRSaveCode(MachineFunction &MF);
@@ -2105,7 +2119,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
-// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool HasVSX, bool &Swap, bool &Negate) {
Swap = false;
@@ -2184,6 +2198,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPEQUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPEQUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPEQUD;
break;
case ISD::SETGT:
if (VecVT == MVT::v16i8)
@@ -2192,6 +2208,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTSH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTSD;
break;
case ISD::SETUGT:
if (VecVT == MVT::v16i8)
@@ -2200,6 +2218,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTUD;
break;
default:
break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 147e94b..871531e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -516,7 +516,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
- setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ else
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -574,15 +579,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
- // VSX v2i64 only supports non-arithmetic operations.
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ if (Subtarget.hasP8Altivec()) {
+ setOperationAction(ISD::SHL, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRA, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v2i64, Expand);
- setOperationAction(ISD::SRA, MVT::v2i64, Expand);
- setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
+ }
+ else {
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ }
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@@ -892,6 +906,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
+ } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 also benefits from (very) aggressive inlining of memcpy and
+ // friends. The overhead of a the function call, even when warm, can be
+ // over one hundred cycles.
+ MaxStoresPerMemset = 128;
+ MaxStoresPerMemcpy = 128;
+ MaxStoresPerMemmove = 128;
}
}
@@ -981,8 +1002,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
- case PPCISD::LARX: return "PPCISD::LARX";
- case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
@@ -1384,17 +1403,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// immediate field for would be zero, and we prefer to use vxor for it.
if (ValSizeInBytes < ByteSize) return SDValue();
- // If the element value is larger than the splat value, cut it in half and
- // check to see if the two halves are equal. Continue doing this until we
- // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
- while (ValSizeInBytes > ByteSize) {
- ValSizeInBytes >>= 1;
-
- // If the top half equals the bottom half, we're still ok.
- if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
- (Value & ((1 << (8*ValSizeInBytes))-1)))
- return SDValue();
- }
+ // If the element value is larger than the splat value, check if it consists
+ // of a repeated bit pattern of size ByteSize.
+ if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
+ return SDValue();
// Properly sign extend the value.
int MaskVal = SignExtend32(Value, ByteSize * 8);
@@ -2436,27 +2448,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// FPR - The set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const MCPhysReg *GetFPR() {
- static const MCPhysReg FPR[] = {
- PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
- };
+static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
+ PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
+ PPC::F11, PPC::F12, PPC::F13};
- return FPR;
-}
-
-/// GetQFPR - Get the set of QPX registers that should be allocated for
-/// arguments.
-static const MCPhysReg *GetQFPR() {
- static const MCPhysReg QFPR[] = {
- PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13
- };
-
- return QFPR;
-}
+/// QFPR - The set of QPX registers that should be allocated for arguments.
+static const MCPhysReg QFPR[] = {
+ PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
@@ -2880,9 +2881,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -2892,8 +2890,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
- static const MCPhysReg *QFPR = GetQFPR();
-
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
@@ -3291,9 +3287,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4187,7 +4180,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -4582,8 +4576,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4593,8 +4585,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
- static const MCPhysReg *QFPR = GetQFPR();
-
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
@@ -5280,8 +5270,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -6418,7 +6406,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, SDLoc dl) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
- static const EVT VTys[] = { // canonical VT to use for each size.
+ static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
@@ -7045,7 +7033,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
/// altivec comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
- bool &isDot) {
+ bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
@@ -7058,29 +7046,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 1;
+ }
+ else
+ return false;
+ break;
+
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtud:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
}
return true;
}
@@ -7094,7 +7136,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
int CompareOpc;
bool isDot;
- if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
@@ -7738,10 +7780,36 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- bool is64bit, unsigned BinOpcode) const {
+ unsigned AtomicSize,
+ unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch (AtomicSize) {
+ default:
+ llvm_unreachable("Unexpected size of atomic entity");
+ case 1:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 2:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 4:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case 8:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
+
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = BB;
@@ -7763,7 +7831,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
- RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
+ RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
// thisMBB:
@@ -7778,11 +7846,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -7800,6 +7868,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode) const {
+ // If we support part-word atomic mnemonics, just use them
+ if (Subtarget.hasPartwordAtomics())
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
+
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
@@ -8365,68 +8437,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
- BB = EmitAtomicBinary(MI, BB, false, 0);
+ BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
- BB = EmitAtomicBinary(MI, BB, true, 0);
+ BB = EmitAtomicBinary(MI, BB, 8, 0);
else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
- MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch(MI->getOpcode()) {
+ default:
+ llvm_unreachable("Compare and swap of unknown size");
+ case PPC::ATOMIC_CMP_SWAP_I8:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I16:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I32:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I64:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
@@ -8452,18 +8552,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(loop1MBB);
// loop1MBB:
- // l[wd]arx dest, ptr
+ // l[bhwd]arx dest, ptr
// cmp[wd] dest, oldval
// bne- midMBB
// loop2MBB:
- // st[wd]cx. newval, ptr
+ // st[bhwd]cx. newval, ptr
// bne- loopMBB
// b exitBB
// midMBB:
- // st[wd]cx. dest, ptr
+ // st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
.addReg(oldval).addReg(dest);
@@ -8473,7 +8573,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(midMBB);
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(newval).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
@@ -8482,7 +8582,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(exitMBB);
BB = midMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(dest).addReg(ptrA).addReg(ptrB);
BB->addSuccessor(exitMBB);
@@ -8682,6 +8782,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
MI->getOperand(0).getReg())
.addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ } else if (MI->getOpcode() == PPC::TCHECK_RET) {
+ DebugLoc Dl = MI->getDebugLoc();
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
+ return BB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -10184,7 +10290,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
- getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
@@ -10297,14 +10403,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpequb_p:
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpequd_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
KnownZero = ~1U; // All bits but the low one are known to be zero.
break;
}
@@ -10914,11 +11023,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+ (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
+ // We should use Altivec/VSX loads and stores when available. For unaligned
+ // addresses, unaligned VSX loads are only fast starting with the P8.
+ if (Subtarget.hasAltivec() && Size >= 16 &&
+ (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+ ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ return MVT::v4i32;
+
if (Subtarget.isPPC64()) {
return MVT::i64;
- } else {
- return MVT::i32;
}
+
+ return MVT::i32;
}
/// \brief Returns true if it is beneficial to convert a load of a constant
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 04afe88..8afd7ef 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -166,14 +166,6 @@ namespace llvm {
/// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
MFFS,
- /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
- /// reserve indexed. This is used to implement atomic operations.
- LARX,
-
- /// STCX = This corresponds to PPC stcx. instrcution: store conditional
- /// indexed. This is used to implement atomic operations.
- STCX,
-
/// TC_RETURN - A tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@@ -489,7 +481,8 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
- MachineBasicBlock *MBB, bool is64Bit,
+ MachineBasicBlock *MBB,
+ unsigned AtomicSize,
unsigned BinOpcode) const;
MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -526,6 +519,21 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "es")
+ return InlineAsm::Constraint_es;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "Z")
+ return InlineAsm::Constraint_Z;
+ else if (ConstraintCode == "Zy")
+ return InlineAsm::Constraint_Zy;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 69c0d7d..183d088 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -235,15 +235,19 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr", IIC_LdStLDARX,
- [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
+ "ldarx $rD, $ptr", IIC_LdStLDARX, []>;
+
+// Instruction to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+}
-let Defs = [CR0] in
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", IIC_LdStSTDCX,
- [(PPCstcx i64:$rS, xoaddr:$dst)]>,
- isDOT;
+ "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -325,6 +329,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
Requires<[In64BitMode]>;
}
+def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR),
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT),
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -696,7 +706,7 @@ def ISEL8 : AForm_4<31, 15,
// Sign extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
"lha $rD, $src", IIC_LdStLHA,
@@ -752,7 +762,7 @@ def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Zero extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 iaddr:$src))]>;
@@ -810,7 +820,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
// Full 8-byte loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index f6acd6e..123808b 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -269,6 +269,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
!strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vB))]>;
+class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
+ !strconcat(opc, " $vD, $vA"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA))]>;
+
+class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
+ !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@@ -342,7 +352,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
-let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+let PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
@@ -750,7 +760,7 @@ def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-
+
let isCodeGenOnly = 1 in {
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", IIC_VecFP,
@@ -939,8 +949,50 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
+let isCommutable = 1 in {
+def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw,
+ v2i64, v4i32>;
+def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw,
+ v2i64, v4i32>;
+def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
+ v2i64, v4i32>;
+def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
+ v2i64, v4i32>;
+def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuluwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>;
+def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
+def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
+def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
+def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
+} // isCommutable
+
+// Vector shifts
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsld $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
+def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
+def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrad $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Integer Arithmetic Instructions
+let isCommutable = 1 in {
+def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
+} // isCommutable
+
+def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
+
// Count Leading Zeros
def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
"vclzb $vD, $vB", IIC_VecGeneral,
@@ -992,4 +1044,42 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vorc $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (or v4i32:$vA,
(vnot_ppc v4i32:$vB)))]>;
+
+// i64 element comparisons.
+def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
+def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
+def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
+def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+
+// The cryptography instructions that do not require Category:Vector.Crypto
+def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
+ int_ppc_altivec_crypto_vpmsumb, v16i8>;
+def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
+ int_ppc_altivec_crypto_vpmsumh, v8i16>;
+def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
+ int_ppc_altivec_crypto_vpmsumw, v4i32>;
+def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
+ int_ppc_altivec_crypto_vpmsumd, v2i64>;
+def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
+ int_ppc_altivec_crypto_vpermxor, v16i8>;
+
} // end HasP8Altivec
+
+// Crypto instructions (from builtins)
+let Predicates = [HasP8Crypto] in {
+def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
+ int_ppc_altivec_crypto_vshasigmaw, v4i32>;
+def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
+ int_ppc_altivec_crypto_vshasigmad, v2i64>;
+def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
+ v2i64>;
+def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
+ int_ppc_altivec_crypto_vcipherlast, v2i64>;
+def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
+ int_ppc_altivec_crypto_vncipher, v2i64>;
+def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
+ int_ppc_altivec_crypto_vncipherlast, v2i64>;
+def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
+} // HasP8Crypto
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 506a2d0..b7a7a1f 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -693,6 +693,60 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0;
}
+class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit R;
+
+ bit RC = 1;
+
+ let Inst{6-9} = 0;
+ let Inst{10} = R;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit A;
+
+ bit RC = 1;
+
+ let Inst{6} = A;
+ let Inst{7-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit L;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{7-9} = 0;
+ let Inst{10} = L;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+
+ bit RC = 0;
+
+ let Inst{6-8} = BF;
+ let Inst{9-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
// XX*-Form (VSX)
class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -1470,6 +1524,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
let Inst{21-31} = xo;
}
+/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
+class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<1> ST;
+ bits<4> SIX;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16} = ST;
+ let Inst{17-20} = SIX;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
+class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
// E-4 VXR-Form
class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td
new file mode 100644
index 0000000..20e6a62
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrHTM.td
@@ -0,0 +1,172 @@
+//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hardware Transactional Memory extension to the
+// PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+
+def HTM_get_imm : SDNodeXForm<imm, [{
+ return getI32Imm (N->getZExtValue());
+}]>;
+
+let hasSideEffects = 1, usesCustomInserter = 1 in {
+def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+}
+
+
+let Predicates = [HasHTM] in {
+
+def TBEGIN : XForm_htm0 <31, 654,
+ (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+
+def TEND : XForm_htm1 <31, 686,
+ (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+
+def TABORT : XForm_base_r3xo <31, 910,
+ (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+ []>, isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TABORTWC : XForm_base_r3xo <31, 782,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTWCI : XForm_base_r3xo <31, 846,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDC : XForm_base_r3xo <31, 814,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDCI : XForm_base_r3xo <31, 878,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TSR : XForm_htm2 <31, 750,
+ (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TCHECK : XForm_htm3 <31, 718,
+ (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
+
+
+def TRECLAIM : XForm_base_r3xo <31, 942,
+ (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+ IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TRECHKPT : XForm_base_r3xo <31, 1006,
+ (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+// Builtins
+
+// All HTM instructions, with the exception of tcheck, set CR0 with the
+// value of the MSR Transaction State (TS) bits that exist before the
+// instruction is executed. For tbegin., the EQ bit in CR0 can be used
+// to determine whether the transaction was successfully started (0) or
+// failed (1). We use an XORI pattern to 'flip' the bit to match the
+// tbegin builtin API which defines a return value of 1 as success.
+
+def : Pat<(int_ppc_tbegin i32:$R),
+ (XORI
+ (EXTRACT_SUBREG (
+ TBEGIN (HTM_get_imm imm:$R)), sub_eq),
+ 1)>;
+
+def : Pat<(int_ppc_tend i32:$R),
+ (TEND (HTM_get_imm imm:$R))>;
+
+
+def : Pat<(int_ppc_tabort i32:$R),
+ (TABORT $R)>;
+
+def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tcheck),
+ (TCHECK_RET)>;
+
+def : Pat<(int_ppc_treclaim i32:$RA),
+ (TRECLAIM $RA)>;
+
+def : Pat<(int_ppc_trechkpt),
+ (TRECHKPT)>;
+
+def : Pat<(int_ppc_tsr i32:$L),
+ (TSR (HTM_get_imm imm:$L))>;
+
+def : Pat<(int_ppc_get_texasr),
+ (MFSPR8 130)>;
+
+def : Pat<(int_ppc_get_texasru),
+ (MFSPR8 131)>;
+
+def : Pat<(int_ppc_get_tfhar),
+ (MFSPR8 128)>;
+
+def : Pat<(int_ppc_get_tfiar),
+ (MFSPR8 129)>;
+
+
+def : Pat<(int_ppc_set_texasr i64:$V),
+ (MTSPR8 130, $V)>;
+
+def : Pat<(int_ppc_set_texasru i64:$V),
+ (MTSPR8 131, $V)>;
+
+def : Pat<(int_ppc_set_tfhar i64:$V),
+ (MTSPR8 128, $V)>;
+
+def : Pat<(int_ppc_set_tfiar i64:$V),
+ (MTSPR8 129, $V)>;
+
+
+// Extended mnemonics
+def : Pat<(int_ppc_tendall),
+ (TEND 1)>;
+
+def : Pat<(int_ppc_tresume),
+ (TSR 1)>;
+
+def : Pat<(int_ppc_tsuspend),
+ (TSR 0)>;
+
+def : Pat<(i64 (int_ppc_ttest)),
+ (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+
+} // [HasHTM]
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index fe9474a..c9c2949 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -61,7 +61,7 @@ void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- Subtarget(STI), RI(STI) {}
+ Subtarget(STI), RI(STI.getTargetMachine()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
@@ -113,9 +113,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
- const TargetRegisterInfo *TRI = &getRegisterInfo();
bool IsRegCR;
- if (TRI->isVirtualRegister(Reg)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
const MachineRegisterInfo *MRI =
&DefMI->getParent()->getParent()->getRegInfo();
IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
@@ -697,6 +696,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
.addReg(Cond[1].getReg(), 0, SubIdx);
}
+static unsigned getCRBitValue(unsigned CRBit) {
+ unsigned Ret = 4;
+ if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
+ CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
+ CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
+ CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
+ Ret = 3;
+ if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
+ CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
+ CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
+ CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
+ Ret = 2;
+ if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
+ CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
+ CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
+ CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
+ Ret = 1;
+ if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
+ CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
+ CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
+ CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
+ Ret = 0;
+
+ assert(Ret != 4 && "Invalid CR bit register");
+ return Ret;
+}
+
void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -742,6 +768,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SrcReg = SuperReg;
}
+ // Different class register copy
+ if (PPC::CRBITRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ unsigned CRReg = getCRFromCRBit(SrcReg);
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(CRReg), getKillRegState(KillSrc);
+ // Rotate the CR bit in the CR fields to be the least significant bit and
+ // then mask with 0x1 (MB = ME = 31).
+ BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
+ .addImm(31)
+ .addImm(31);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::G8RCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ }
+
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 4add6f9..7fd076a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -63,7 +63,7 @@ enum PPC970_Unit {
};
} // end namespace PPCII
-
+class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 1a045b1..5eff156 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -46,13 +46,6 @@ def SDT_PPCstbrx : SDTypeProfile<0, 3, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
-def SDT_PPClarx : SDTypeProfile<1, 1, [
- SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-def SDT_PPCstcx : SDTypeProfile<0, 2, [
- SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-
def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
@@ -225,12 +218,6 @@ def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-// Instructions to support atomic operations
-def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
- [SDNPHasChain, SDNPMayLoad]>;
-def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
- [SDNPHasChain, SDNPMayStore]>;
-
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -445,6 +432,18 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
+def crrc0 : RegisterOperand<CRRC0> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCU1ImmAsmOperand : AsmOperandClass {
+ let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u1imm : Operand<i32> {
+ let PrintMethod = "printU1ImmOperand";
+ let ParserMatchClass = PPCU1ImmAsmOperand;
+}
def PPCU2ImmAsmOperand : AsmOperandClass {
let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
@@ -455,6 +454,15 @@ def u2imm : Operand<i32> {
let ParserMatchClass = PPCU2ImmAsmOperand;
}
+def PPCU3ImmAsmOperand : AsmOperandClass {
+ let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u3imm : Operand<i32> {
+ let PrintMethod = "printU3ImmOperand";
+ let ParserMatchClass = PPCU3ImmAsmOperand;
+}
+
def PPCU4ImmAsmOperand : AsmOperandClass {
let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
let RenderMethod = "addImmOperands";
@@ -715,7 +723,7 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
def IsE500 : Predicate<"PPCSubTarget->isE500()">;
def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
-
+def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
@@ -1446,15 +1454,44 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
+def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src", IIC_LdStLWARX,
- [(set i32:$rD, (PPClarx xoaddr:$src))]>;
+ "lwarx $rD, $src", IIC_LdStLWARX, []>;
+
+// Instructions to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
+ "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
+ "stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
+
+def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
-let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", IIC_LdStSTWCX,
- [(PPCstcx i32:$rS, xoaddr:$dst)]>,
- isDOT;
+ "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+}
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
@@ -1473,7 +1510,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
//
// Unindexed (r+i) Loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
@@ -1570,7 +1607,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
@@ -2683,6 +2720,7 @@ include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
include "PPCInstrQPX.td"
+include "PPCInstrHTM.td"
def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;
diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td
index c984d46..5c66b42 100644
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@@ -501,7 +501,7 @@ let Uses = [RM] in {
"qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
// Load indexed instructions
- let mayLoad = 1, canFoldAsLoad = 1 in {
+ let mayLoad = 1 in {
def QVLFDX : XForm_1<31, 583,
(outs qfrc:$FRT), (ins memrr:$src),
"qvlfdx $FRT, $src", IIC_LdStLFD,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index d6cb3a0..ec04da4 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -66,7 +66,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
- let mayLoad = 1, canFoldAsLoad = 1 in {
+ let mayLoad = 1 in {
def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index efd2d92..005bcaf 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -104,7 +104,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref
bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
- DL = F.getParent()->getDataLayout();
+ DL = &F.getParent()->getDataLayout();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -192,7 +192,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second);
if (const SCEVConstant *ConstPtrDiff =
dyn_cast<SCEVConstant>(PtrDiff)) {
- int64_t PD = abs64(ConstPtrDiff->getValue()->getSExtValue());
+ int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
if (PD < (int64_t) CacheLineSize) {
DupPref = true;
break;
@@ -211,7 +211,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
- SCEVExpander SCEVE(*SE, "prefaddr");
+ SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
IRBuilder<> Builder(MemI);
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index df65227..092a4ef 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
@@ -84,7 +85,6 @@ namespace {
PPCTargetMachine *TM;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
};
}
@@ -141,9 +141,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
-
bool MadeChange = false;
for (LoopInfo::iterator I = LI->begin(), E = LI->end();
@@ -158,9 +155,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
bool MadeChange = false;
- if (!DL)
- return MadeChange;
-
// Only prep. the inner-most loop
if (!L->empty())
return MadeChange;
@@ -261,6 +255,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
Value *BasePtr = GetPointerOperand(MemI);
assert(BasePtr && "No pointer operand");
+ Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
BasePtr->getType()->getPointerAddressSpace());
@@ -280,7 +275,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
MemI->hasName() ? MemI->getName() + ".phi" : "",
Header->getFirstNonPHI());
- SCEVExpander SCEVE(*SE, "pistart");
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
LoopPredecessor->getTerminator());
@@ -295,8 +290,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
}
Instruction *InsPoint = Header->getFirstInsertionPt();
- GetElementPtrInst *PtrInc =
- GetElementPtrInst::Create(NewPHI, BasePtrIncSCEV->getValue(),
+ GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -341,9 +336,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
PtrIP = PtrIP->getParent()->getFirstInsertionPt();
else if (!PtrIP)
PtrIP = I->second;
-
- GetElementPtrInst *NewPtr =
- GetElementPtrInst::Create(PtrInc, Diff->getValue(),
+
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, Diff->getValue(),
I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
if (!PtrIP)
NewPtr->insertAfter(cast<Instruction>(PtrInc));
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 819738b..0965cb3 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -184,6 +184,9 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ assert(MO.getReg() > PPC::NoRegister &&
+ MO.getReg() < PPC::NUM_TARGET_REGS &&
+ "Invalid register for this target!");
MCOp = MCOperand::CreateReg(MO.getReg());
break;
case MachineOperand::MO_Immediate:
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index c9a9684..0e568d3 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -56,11 +57,11 @@ static cl::opt<bool>
AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
cl::desc("Force the use of a base pointer in every function"));
-PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
- : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
- ST.isPPC64() ? 0 : 1,
- ST.isPPC64() ? 0 : 1),
- Subtarget(ST) {
+PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
+ : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
+ TM.isPPC64() ? 0 : 1,
+ TM.isPPC64() ? 0 : 1),
+ TM(TM) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -87,18 +88,19 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
// Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
// when it checks for ZERO folding.
if (Kind == 1) {
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return &PPC::G8RC_NOX0RegClass;
return &PPC::GPRC_NOR0RegClass;
}
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
}
const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_SaveList;
@@ -108,28 +110,28 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_Darwin64_Altivec_SaveList :
- CSR_Darwin64_SaveList) :
- (Subtarget.hasAltivec() ?
- CSR_Darwin32_Altivec_SaveList :
- CSR_Darwin32_SaveList);
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList
+ : CSR_Darwin64_SaveList)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
+ : CSR_Darwin32_SaveList);
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList :
- CSR_SVR464_Altivec_SaveList) :
- (SaveR2 ? CSR_SVR464_R2_SaveList :
- CSR_SVR464_SaveList)) :
- (Subtarget.hasAltivec() ?
- CSR_SVR432_Altivec_SaveList :
- CSR_SVR432_SaveList);
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec()
+ ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+ : CSR_SVR464_Altivec_SaveList)
+ : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
+ : CSR_SVR432_SaveList);
}
-const uint32_t*
-PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_RegMask;
@@ -139,19 +141,15 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_Darwin64_Altivec_RegMask :
- CSR_Darwin64_RegMask) :
- (Subtarget.hasAltivec() ?
- CSR_Darwin32_Altivec_RegMask :
- CSR_Darwin32_RegMask);
-
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_SVR464_Altivec_RegMask :
- CSR_SVR464_RegMask) :
- (Subtarget.hasAltivec() ?
- CSR_SVR432_Altivec_RegMask :
- CSR_SVR432_RegMask);
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask
+ : CSR_Darwin64_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
+ : CSR_Darwin32_RegMask);
+
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
+ : CSR_SVR464_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
+ : CSR_SVR432_RegMask);
}
const uint32_t*
@@ -160,15 +158,13 @@ PPCRegisterInfo::getNoPreservedMask() const {
}
void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
- unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM };
- for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) {
- unsigned Reg = PseudoRegs[i];
- Mask[Reg / 32] &= ~(1u << (Reg % 32));
- }
+ for (unsigned PseudoReg : {PPC::ZERO, PPC::ZERO8, PPC::RM})
+ Mask[PseudoReg / 32] &= ~(1u << (PseudoReg % 32));
}
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
@@ -207,7 +203,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
// On PPC64, r13 is the thread pointer. Never allocate this register.
- if (Subtarget.isPPC64()) {
+ if (TM.isPPC64()) {
Reserved.set(PPC::R13);
Reserved.set(PPC::X1);
@@ -238,15 +234,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R31);
if (hasBasePointer(MF)) {
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R29);
else
Reserved.set(PPC::R30);
}
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -260,6 +256,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
const unsigned DefaultSafety = 1;
@@ -291,8 +288,10 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass(
- const TargetRegisterClass *RC) const {
+const TargetRegisterClass *
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (Subtarget.hasVSX()) {
// With VSX, we can inflate various sub-register classes to the full VSX
// register set.
@@ -303,7 +302,7 @@ const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass(
return &PPC::VSRCRegClass;
}
- return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
}
//===----------------------------------------------------------------------===//
@@ -326,10 +325,11 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
MachineFunction &MF = *MBB.getParent();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
// Determine whether 64-bit pointers are used.
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
DebugLoc dl = MI.getDebugLoc();
// Get the maximum call stack size.
@@ -443,10 +443,11 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -487,10 +488,11 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -522,37 +524,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-static unsigned getCRFromCRBit(unsigned SrcReg) {
- unsigned Reg = 0;
- if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
- SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
- Reg = PPC::CR0;
- else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
- SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
- Reg = PPC::CR1;
- else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
- SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
- Reg = PPC::CR2;
- else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
- SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
- Reg = PPC::CR3;
- else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
- SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
- Reg = PPC::CR4;
- else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
- SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
- Reg = PPC::CR5;
- else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
- SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
- Reg = PPC::CR6;
- else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
- SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
- Reg = PPC::CR7;
-
- assert(Reg != 0 && "Invalid CR bit register");
- return Reg;
-}
-
void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const {
// Get the instruction.
@@ -560,10 +531,11 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -603,10 +575,11 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -650,6 +623,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -675,6 +649,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -697,14 +672,14 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
bool
PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
-
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
// is arbitrary and will be subsequently ignored. For 32-bit, we have
// previously created the stack slot if needed, so return its FrameIdx.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
FrameIdx = 0;
else {
const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -757,6 +732,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
// Get the basic block's function.
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
// Get the frame info.
@@ -847,7 +823,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- bool is64Bit = Subtarget.isPPC64();
+ bool is64Bit = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
@@ -885,23 +861,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
- if (!Subtarget.isPPC64())
+ if (!TM.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
else
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (!hasBasePointer(MF))
return getFrameRegister(MF);
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return PPC::X30;
if (Subtarget.isSVR4ABI() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ TM.getRelocationModel() == Reloc::PIC_)
return PPC::R29;
return PPC::R30;
@@ -927,6 +905,7 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment();
@@ -964,7 +943,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
-
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
unsigned StackEst =
@@ -983,7 +962,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The frame pointer will point to the end of the stack, so estimate the
// offset as the difference between the object offset and the FP location.
- return !isFrameOffsetLegal(MI, Offset);
+ return !isFrameOffsetLegal(MI, getBaseRegister(MF), Offset);
}
/// Insert defining instruction(s) for BaseReg to
@@ -992,7 +971,7 @@ void PPCRegisterInfo::
materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
int64_t Offset) const {
- unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@@ -1000,6 +979,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
DL = Ins->getDebugLoc();
const MachineFunction &MF = *MBB->getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
@@ -1025,6 +1005,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = MI.getDesc();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1033,6 +1014,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI->getOperand(FIOperandNum).isFI()) {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 4c2ef90..d304e1d 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -22,15 +22,44 @@
#include "PPCGenRegisterInfo.inc"
namespace llvm {
-class PPCSubtarget;
-class TargetInstrInfo;
-class Type;
+
+inline static unsigned getCRFromCRBit(unsigned SrcReg) {
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ assert(Reg != 0 && "Invalid CR bit register");
+ return Reg;
+}
+
class PPCRegisterInfo : public PPCGenRegisterInfo {
DenseMap<unsigned, unsigned> ImmToIdxMap;
- const PPCSubtarget &Subtarget;
+ const PPCTargetMachine &TM;
public:
- PPCRegisterInfo(const PPCSubtarget &SubTarget);
+ PPCRegisterInfo(const PPCTargetMachine &TM);
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
@@ -40,13 +69,14 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- const TargetRegisterClass*
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const;
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
@@ -97,7 +127,7 @@ public:
int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
// Debug information queries.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 9a7df96..6ca68ed 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -341,6 +341,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
CR7, CR2, CR3, CR4)>;
+def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
+
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 2f3a1f9..d0954a1 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -124,401 +124,3 @@ include "PPCScheduleP8.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500mc.td"
include "PPCScheduleE5500.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction to itinerary class map - When add new opcodes to the supported
-// set, refer to the following table to determine which itinerary class the
-// opcode belongs.
-//
-// opcode itinerary class
-// ====== ===============
-// add IIC_IntSimple
-// addc IIC_IntGeneral
-// adde IIC_IntGeneral
-// addi IIC_IntSimple
-// addic IIC_IntGeneral
-// addic. IIC_IntGeneral
-// addis IIC_IntSimple
-// addme IIC_IntGeneral
-// addze IIC_IntGeneral
-// and IIC_IntSimple
-// andc IIC_IntSimple
-// andi. IIC_IntGeneral
-// andis. IIC_IntGeneral
-// b IIC_BrB
-// bc IIC_BrB
-// bcctr IIC_BrB
-// bclr IIC_BrB
-// cmp IIC_IntCompare
-// cmpi IIC_IntCompare
-// cmpl IIC_IntCompare
-// cmpli IIC_IntCompare
-// cntlzd IIC_IntRotateD
-// cntlzw IIC_IntGeneral
-// crand IIC_BrCR
-// crandc IIC_BrCR
-// creqv IIC_BrCR
-// crnand IIC_BrCR
-// crnor IIC_BrCR
-// cror IIC_BrCR
-// crorc IIC_BrCR
-// crxor IIC_BrCR
-// dcba IIC_LdStDCBA
-// dcbf IIC_LdStDCBF
-// dcbi IIC_LdStDCBI
-// dcbst IIC_LdStDCBF
-// dcbt IIC_LdStLoad
-// dcbtst IIC_LdStLoad
-// dcbz IIC_LdStDCBF
-// divd IIC_IntDivD
-// divdu IIC_IntDivD
-// divw IIC_IntDivW
-// divwu IIC_IntDivW
-// dss IIC_LdStDSS
-// dst IIC_LdStDSS
-// dstst IIC_LdStDSS
-// eciwx IIC_LdStLoad
-// ecowx IIC_LdStLoad
-// eieio IIC_LdStLoad
-// eqv IIC_IntSimple
-// extsb IIC_IntSimple
-// extsh IIC_IntSimple
-// extsw IIC_IntSimple
-// fabs IIC_FPGeneral
-// fadd IIC_FPAddSub
-// fadds IIC_FPGeneral
-// fcfid IIC_FPGeneral
-// fcmpo IIC_FPCompare
-// fcmpu IIC_FPCompare
-// fctid IIC_FPGeneral
-// fctidz IIC_FPGeneral
-// fctiw IIC_FPGeneral
-// fctiwz IIC_FPGeneral
-// fdiv IIC_FPDivD
-// fdivs IIC_FPDivS
-// fmadd IIC_FPFused
-// fmadds IIC_FPGeneral
-// fmr IIC_FPGeneral
-// fmsub IIC_FPFused
-// fmsubs IIC_FPGeneral
-// fmul IIC_FPFused
-// fmuls IIC_FPGeneral
-// fnabs IIC_FPGeneral
-// fneg IIC_FPGeneral
-// fnmadd IIC_FPFused
-// fnmadds IIC_FPGeneral
-// fnmsub IIC_FPFused
-// fnmsubs IIC_FPGeneral
-// fres IIC_FPRes
-// frsp IIC_FPGeneral
-// frsqrte IIC_FPGeneral
-// fsel IIC_FPGeneral
-// fsqrt IIC_FPSqrtD
-// fsqrts IIC_FPSqrtS
-// fsub IIC_FPAddSub
-// fsubs IIC_FPGeneral
-// icbi IIC_LdStICBI
-// isel IIC_IntISEL
-// isync IIC_SprISYNC
-// lbz IIC_LdStLoad
-// lbzu IIC_LdStLoadUpd
-// lbzux IIC_LdStLoadUpdX
-// lbzx IIC_LdStLoad
-// ld IIC_LdStLD
-// ldarx IIC_LdStLDARX
-// ldu IIC_LdStLDU
-// ldux IIC_LdStLDUX
-// ldx IIC_LdStLD
-// lfd IIC_LdStLFD
-// lfdu IIC_LdStLFDU
-// lfdux IIC_LdStLFDUX
-// lfdx IIC_LdStLFD
-// lfs IIC_LdStLFD
-// lfsu IIC_LdStLFDU
-// lfsux IIC_LdStLFDUX
-// lfsx IIC_LdStLFD
-// lha IIC_LdStLHA
-// lhau IIC_LdStLHAU
-// lhaux IIC_LdStLHAUX
-// lhax IIC_LdStLHA
-// lhbrx IIC_LdStLoad
-// lhz IIC_LdStLoad
-// lhzu IIC_LdStLoadUpd
-// lhzux IIC_LdStLoadUpdX
-// lhzx IIC_LdStLoad
-// lmw IIC_LdStLMW
-// lswi IIC_LdStLMW
-// lswx IIC_LdStLMW
-// lvebx IIC_LdStLVecX
-// lvehx IIC_LdStLVecX
-// lvewx IIC_LdStLVecX
-// lvsl IIC_LdStLVecX
-// lvsr IIC_LdStLVecX
-// lvx IIC_LdStLVecX
-// lvxl IIC_LdStLVecX
-// lwa IIC_LdStLWA
-// lwarx IIC_LdStLWARX
-// lwaux IIC_LdStLHAUX
-// lwax IIC_LdStLHA
-// lwbrx IIC_LdStLoad
-// lwz IIC_LdStLoad
-// lwzu IIC_LdStLoadUpd
-// lwzux IIC_LdStLoadUpdX
-// lwzx IIC_LdStLoad
-// mcrf IIC_BrMCR
-// mcrfs IIC_FPGeneral
-// mcrxr IIC_BrMCRX
-// mfcr IIC_SprMFCR
-// mffs IIC_IntMFFS
-// mfmsr IIC_SprMFMSR
-// mfspr IIC_SprMFSPR
-// mfsr IIC_SprMFSR
-// mfsrin IIC_SprMFSR
-// mftb IIC_SprMFTB
-// mfvscr IIC_IntMFVSCR
-// mtcrf IIC_BrMCRX
-// mtfsb0 IIC_IntMTFSB0
-// mtfsb1 IIC_IntMTFSB0
-// mtfsf IIC_IntMTFSB0
-// mtfsfi IIC_IntMTFSB0
-// mtmsr IIC_SprMTMSR
-// mtmsrd IIC_LdStLD
-// mtspr IIC_SprMTSPR
-// mtsr IIC_SprMTSR
-// mtsrd IIC_IntMTSRD
-// mtsrdin IIC_IntMTSRD
-// mtsrin IIC_SprMTSRIN
-// mtvscr IIC_IntMFVSCR
-// mulhd IIC_IntMulHD
-// mulhdu IIC_IntMulHD
-// mulhw IIC_IntMulHW
-// mulhwu IIC_IntMulHWU
-// mulld IIC_IntMulHD
-// mulli IIC_IntMulLI
-// mullw IIC_IntMulHW
-// nand IIC_IntSimple
-// neg IIC_IntSimple
-// nor IIC_IntSimple
-// or IIC_IntSimple
-// orc IIC_IntSimple
-// ori IIC_IntSimple
-// oris IIC_IntSimple
-// rfi IIC_SprRFI
-// rfid IIC_IntRFID
-// rldcl IIC_IntRotateD
-// rldcr IIC_IntRotateD
-// rldic IIC_IntRotateDI
-// rldicl IIC_IntRotateDI
-// rldicr IIC_IntRotateDI
-// rldimi IIC_IntRotateDI
-// rlwimi IIC_IntRotate
-// rlwinm IIC_IntGeneral
-// rlwnm IIC_IntGeneral
-// sc IIC_SprSC
-// slbia IIC_LdStSLBIA
-// slbie IIC_LdStSLBIE
-// sld IIC_IntRotateD
-// slw IIC_IntGeneral
-// srad IIC_IntRotateD
-// sradi IIC_IntRotateDI
-// sraw IIC_IntShift
-// srawi IIC_IntShift
-// srd IIC_IntRotateD
-// srw IIC_IntGeneral
-// stb IIC_LdStStore
-// stbu IIC_LdStStoreUpd
-// stbux IIC_LdStStoreUpd
-// stbx IIC_LdStStore
-// std IIC_LdStSTD
-// stdcx. IIC_LdStSTDCX
-// stdu IIC_LdStSTDU
-// stdux IIC_LdStSTDUX
-// stdx IIC_LdStSTD
-// stfd IIC_LdStSTFD
-// stfdu IIC_LdStSTFDU
-// stfdux IIC_LdStSTFDU
-// stfdx IIC_LdStSTFD
-// stfiwx IIC_LdStSTFD
-// stfs IIC_LdStSTFD
-// stfsu IIC_LdStSTFDU
-// stfsux IIC_LdStSTFDU
-// stfsx IIC_LdStSTFD
-// sth IIC_LdStStore
-// sthbrx IIC_LdStStore
-// sthu IIC_LdStStoreUpd
-// sthux IIC_LdStStoreUpd
-// sthx IIC_LdStStore
-// stmw IIC_LdStLMW
-// stswi IIC_LdStLMW
-// stswx IIC_LdStLMW
-// stvebx IIC_LdStSTVEBX
-// stvehx IIC_LdStSTVEBX
-// stvewx IIC_LdStSTVEBX
-// stvx IIC_LdStSTVEBX
-// stvxl IIC_LdStSTVEBX
-// stw IIC_LdStStore
-// stwbrx IIC_LdStStore
-// stwcx. IIC_LdStSTWCX
-// stwu IIC_LdStStoreUpd
-// stwux IIC_LdStStoreUpd
-// stwx IIC_LdStStore
-// subf IIC_IntGeneral
-// subfc IIC_IntGeneral
-// subfe IIC_IntGeneral
-// subfic IIC_IntGeneral
-// subfme IIC_IntGeneral
-// subfze IIC_IntGeneral
-// sync IIC_LdStSync
-// td IIC_IntTrapD
-// tdi IIC_IntTrapD
-// tlbia IIC_LdStSLBIA
-// tlbie IIC_LdStDCBF
-// tlbsync IIC_SprTLBSYNC
-// tw IIC_IntTrapW
-// twi IIC_IntTrapW
-// vaddcuw IIC_VecGeneral
-// vaddfp IIC_VecFP
-// vaddsbs IIC_VecGeneral
-// vaddshs IIC_VecGeneral
-// vaddsws IIC_VecGeneral
-// vaddubm IIC_VecGeneral
-// vaddubs IIC_VecGeneral
-// vadduhm IIC_VecGeneral
-// vadduhs IIC_VecGeneral
-// vadduwm IIC_VecGeneral
-// vadduws IIC_VecGeneral
-// vand IIC_VecGeneral
-// vandc IIC_VecGeneral
-// vavgsb IIC_VecGeneral
-// vavgsh IIC_VecGeneral
-// vavgsw IIC_VecGeneral
-// vavgub IIC_VecGeneral
-// vavguh IIC_VecGeneral
-// vavguw IIC_VecGeneral
-// vcfsx IIC_VecFP
-// vcfux IIC_VecFP
-// vcmpbfp IIC_VecFPCompare
-// vcmpeqfp IIC_VecFPCompare
-// vcmpequb IIC_VecGeneral
-// vcmpequh IIC_VecGeneral
-// vcmpequw IIC_VecGeneral
-// vcmpgefp IIC_VecFPCompare
-// vcmpgtfp IIC_VecFPCompare
-// vcmpgtsb IIC_VecGeneral
-// vcmpgtsh IIC_VecGeneral
-// vcmpgtsw IIC_VecGeneral
-// vcmpgtub IIC_VecGeneral
-// vcmpgtuh IIC_VecGeneral
-// vcmpgtuw IIC_VecGeneral
-// vctsxs IIC_VecFP
-// vctuxs IIC_VecFP
-// vexptefp IIC_VecFP
-// vlogefp IIC_VecFP
-// vmaddfp IIC_VecFP
-// vmaxfp IIC_VecFPCompare
-// vmaxsb IIC_VecGeneral
-// vmaxsh IIC_VecGeneral
-// vmaxsw IIC_VecGeneral
-// vmaxub IIC_VecGeneral
-// vmaxuh IIC_VecGeneral
-// vmaxuw IIC_VecGeneral
-// vmhaddshs IIC_VecComplex
-// vmhraddshs IIC_VecComplex
-// vminfp IIC_VecFPCompare
-// vminsb IIC_VecGeneral
-// vminsh IIC_VecGeneral
-// vminsw IIC_VecGeneral
-// vminub IIC_VecGeneral
-// vminuh IIC_VecGeneral
-// vminuw IIC_VecGeneral
-// vmladduhm IIC_VecComplex
-// vmrghb IIC_VecPerm
-// vmrghh IIC_VecPerm
-// vmrghw IIC_VecPerm
-// vmrglb IIC_VecPerm
-// vmrglh IIC_VecPerm
-// vmrglw IIC_VecPerm
-// vmsubfp IIC_VecFP
-// vmsummbm IIC_VecComplex
-// vmsumshm IIC_VecComplex
-// vmsumshs IIC_VecComplex
-// vmsumubm IIC_VecComplex
-// vmsumuhm IIC_VecComplex
-// vmsumuhs IIC_VecComplex
-// vmulesb IIC_VecComplex
-// vmulesh IIC_VecComplex
-// vmuleub IIC_VecComplex
-// vmuleuh IIC_VecComplex
-// vmulosb IIC_VecComplex
-// vmulosh IIC_VecComplex
-// vmuloub IIC_VecComplex
-// vmulouh IIC_VecComplex
-// vnor IIC_VecGeneral
-// vor IIC_VecGeneral
-// vperm IIC_VecPerm
-// vpkpx IIC_VecPerm
-// vpkshss IIC_VecPerm
-// vpkshus IIC_VecPerm
-// vpkswss IIC_VecPerm
-// vpkswus IIC_VecPerm
-// vpkuhum IIC_VecPerm
-// vpkuhus IIC_VecPerm
-// vpkuwum IIC_VecPerm
-// vpkuwus IIC_VecPerm
-// vrefp IIC_VecFPRound
-// vrfim IIC_VecFPRound
-// vrfin IIC_VecFPRound
-// vrfip IIC_VecFPRound
-// vrfiz IIC_VecFPRound
-// vrlb IIC_VecGeneral
-// vrlh IIC_VecGeneral
-// vrlw IIC_VecGeneral
-// vrsqrtefp IIC_VecFP
-// vsel IIC_VecGeneral
-// vsl IIC_VecVSL
-// vslb IIC_VecGeneral
-// vsldoi IIC_VecPerm
-// vslh IIC_VecGeneral
-// vslo IIC_VecPerm
-// vslw IIC_VecGeneral
-// vspltb IIC_VecPerm
-// vsplth IIC_VecPerm
-// vspltisb IIC_VecPerm
-// vspltish IIC_VecPerm
-// vspltisw IIC_VecPerm
-// vspltw IIC_VecPerm
-// vsr IIC_VecVSR
-// vsrab IIC_VecGeneral
-// vsrah IIC_VecGeneral
-// vsraw IIC_VecGeneral
-// vsrb IIC_VecGeneral
-// vsrh IIC_VecGeneral
-// vsro IIC_VecPerm
-// vsrw IIC_VecGeneral
-// vsubcuw IIC_VecGeneral
-// vsubfp IIC_VecFP
-// vsubsbs IIC_VecGeneral
-// vsubshs IIC_VecGeneral
-// vsubsws IIC_VecGeneral
-// vsububm IIC_VecGeneral
-// vsububs IIC_VecGeneral
-// vsubuhm IIC_VecGeneral
-// vsubuhs IIC_VecGeneral
-// vsubuwm IIC_VecGeneral
-// vsubuws IIC_VecGeneral
-// vsum2sws IIC_VecComplex
-// vsum4sbs IIC_VecComplex
-// vsum4shs IIC_VecComplex
-// vsum4ubs IIC_VecComplex
-// vsumsws IIC_VecComplex
-// vupkhpx IIC_VecPerm
-// vupkhsb IIC_VecPerm
-// vupkhsh IIC_VecPerm
-// vupklpx IIC_VecPerm
-// vupklsb IIC_VecPerm
-// vupklsh IIC_VecPerm
-// vxor IIC_VecGeneral
-// xor IIC_IntSimple
-// xori IIC_IntSimple
-// xoris IIC_IntSimple
-//
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index c91428d..ed88803 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -69,6 +69,7 @@ void PPCSubtarget::initializeEnvironment() {
HasVSX = false;
HasP8Vector = false;
HasP8Altivec = false;
+ HasP8Crypto = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -94,7 +95,9 @@ void PPCSubtarget::initializeEnvironment() {
HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
+ HasPartwordAtomics = false;
IsQPXStackUnaligned = false;
+ HasHTM = false;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 247a96d..b4c1bb1 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -90,6 +90,7 @@ protected:
bool HasVSX;
bool HasP8Vector;
bool HasP8Altivec;
+ bool HasP8Crypto;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -113,6 +114,8 @@ protected:
bool IsLittleEndian;
bool HasICBT;
bool HasInvariantFunctionDescriptors;
+ bool HasPartwordAtomics;
+ bool HasHTM;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
@@ -218,6 +221,7 @@ public:
bool hasVSX() const { return HasVSX; }
bool hasP8Vector() const { return HasP8Vector; }
bool hasP8Altivec() const { return HasP8Altivec; }
+ bool hasP8Crypto() const { return HasP8Crypto; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
@@ -234,6 +238,7 @@ public:
bool hasInvariantFunctionDescriptors() const {
return HasInvariantFunctionDescriptors;
}
+ bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
unsigned getPlatformStackAlignment() const {
@@ -242,6 +247,7 @@ public:
return 16;
}
+ bool hasHTM() const { return HasHTM; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b219e93..7267529 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -160,11 +160,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
- CM, OL),
+ : LLVMTargetMachine(T, getDataLayoutString(Triple(TT)), TT, CPU,
+ computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- TargetABI(computeTargetABI(Triple(TT), Options)),
- DL(getDataLayoutString(Triple(TT))), Subtarget(TT, CPU, TargetFS, *this) {
+ TargetABI(computeTargetABI(Triple(TT), Options)) {
initAsmInfo();
}
@@ -208,7 +207,15 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+ I = llvm::make_unique<PPCSubtarget>(
+ TargetTriple, CPU,
+ // FIXME: It would be good to have the subtarget additions here
+ // not necessary. Anything that turns them on/off (overrides) ends
+ // up being put at the end of the feature string, but the defaults
+ // shouldn't require adding them. Fixing this means pulling Feature64Bit
+ // out of most of the target cpus in the .td file and making it set only
+ // as part of initialization via the TargetTriple.
+ computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
}
return I.get();
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 6508484..7a49058 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,10 +29,6 @@ public:
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
- // Calculates type size & alignment
- const DataLayout DL;
- PPCSubtarget Subtarget;
-
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
@@ -42,8 +38,6 @@ public:
~PPCTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 073bbb0..b46acd4 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -192,6 +192,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
BaseT::getUnrollingPreferences(L, UP);
}
+bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+ return LoopHasReductions;
+}
+
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index cef7079..21acea1 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -78,6 +78,7 @@ public:
/// \name Vector TTI Implementations
/// @{
+ bool enableAggressiveInterleaving(bool LoopHasReductions);
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor();
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 4132b04..dfe988f 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -621,3 +621,10 @@ void foo() {
bar(x);
__asm__("" ::: "cr2");
}
+
+//===----------------------------------------------------------------------===//
+
+Instruction fusion was introduced in ISA 2.06 and more opportunities added in
+ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities
+and force instruction pairs to be scheduled together.
+
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index 1e4c6fb..43d87d3 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -209,3 +209,107 @@ vector float f(vector float a, vector float b) {
return b;
}
+//===----------------------------------------------------------------------===//
+
+We should do a little better with eliminating dead stores.
+The stores to the stack are dead since %a and %b are not needed
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+ entry:
+ %a = alloca <16 x i8>, align 16
+ %b = alloca <16 x i8>, align 16
+ store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+ store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+ %0 = load <16 x i8>* %a, align 16
+ %1 = load <16 x i8>* %b, align 16
+ %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+ ret <16 x i8> %2
+}
+
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+
+Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
+# BB#0: # %entry
+ addis 3, 2, .LCPI0_0@toc@ha
+ addis 4, 2, .LCPI0_1@toc@ha
+ addi 3, 3, .LCPI0_0@toc@l
+ addi 4, 4, .LCPI0_1@toc@l
+ lxvw4x 0, 0, 3
+ addi 3, 1, -16
+ lxvw4x 35, 0, 4
+ stxvw4x 0, 0, 3
+ ori 2, 2, 0
+ lxvw4x 34, 0, 3
+ addi 3, 1, -32
+ stxvw4x 35, 0, 3
+ vpmsumb 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+
+The two stxvw4x instructions are not needed.
+With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
+are present too.
+
+//===----------------------------------------------------------------------===//
+
+The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
+
+define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
+ %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+ %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+ %result = add <2 x i64> %x, %tmpvec2
+ ret <2 x i64> %result
+
+This will generate the following instruction sequence:
+ std 5, -8(1)
+ std 5, -16(1)
+ addi 3, 1, -16
+ ori 2, 2, 0
+ lxvd2x 35, 0, 3
+ vaddudm 2, 2, 3
+ blr
+
+This will almost certainly cause a load-hit-store hazard.
+Since val is a value parameter, it should not need to be saved onto
+the stack, unless it's being done set up the vector register. Instead,
+it would be better to splat teh value into a vector register, and then
+remove the (dead) stores to the stack.
+
+//===----------------------------------------------------------------------===//
+
+At the moment we always generate a lxsdx in preference to lfd, or stxsdx in
+preference to stfd. When we have a reg-immediate addressing mode, this is a
+poor choice, since we have to load the address into an index register. This
+should be fixed for P7/P8.
+
+//===----------------------------------------------------------------------===//
+
+Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE.
+However, we could actually support both kinds on either endianness, if we check
+for the appropriate shufflevector pattern for each case ... this would cause
+some additional shufflevectors to be recognized and implemented via the
+"swapped" form.
+
+//===----------------------------------------------------------------------===//
+
+There is a utility program called PerfectShuffle that generates a table of the
+shortest instruction sequence for implementing a shufflevector operation on
+PowerPC. However, this was designed for big-endian code generation. We could
+modify this program to create a little endian version of the table. The table
+is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE().
+
+//===----------------------------------------------------------------------===//
+
+Opportunies to use instructions from PPCInstrVSX.td during code gen
+ - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07)
+ - Scalar comparisons (xscmpodp and xscmpudp)
+ - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp)
+
+Related to this: we currently do not generate the lxvw4x instruction for either
+v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires
+a single target type. This should probably be addressed in the PPCISelDAGToDAG logic.
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index a7d48b3..e5d5ce2 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -103,6 +103,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"true",
"Enable spilling of VGPRs to scratch memory">;
+def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
+ "SGPRInitBug",
+ "true",
+ "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 92bc314..d911014 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -105,8 +105,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
- EmitFunctionHeader();
-
MCContext &Context = getObjFileLowering().getContext();
const MCSectionELF *ConfigSection =
Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
@@ -129,7 +127,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
HexLines.clear();
DisasmLineMaxLen = 0;
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
if (isVerbose()) {
@@ -339,6 +336,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
+ if (STM.hasSGPRInitBug()) {
+ if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
+ llvm_unreachable("Too many SGPRs used with the SGPR init bug");
+
+ ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ }
+
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index b5ab703..7341cd9 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -95,7 +95,8 @@ private:
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset) const;
+ SDValue &SOffset, SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
@@ -120,6 +121,11 @@ private:
SDNode *SelectADD_SUB_I64(SDNode *N);
SDNode *SelectDIV_SCALE(SDNode *N);
+ SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width);
+ SDNode *SelectS_BFEFromShifts(SDNode *N);
+ SDNode *SelectS_BFE(SDNode *N);
+
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
@@ -519,21 +525,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
bool Signed = Opc == AMDGPUISD::BFE_I32;
- // Transformation function, pack the offset and width of a BFE into
- // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
- // source, bits [5:0] contain the offset and bits [22:16] the width.
-
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- uint32_t PackedVal = OffsetVal | WidthVal << 16;
-
- SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
- return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
- SDLoc(N),
- MVT::i32,
- N->getOperand(0),
- PackedOffsetWidth);
+ return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
+ N->getOperand(0), OffsetVal, WidthVal);
}
case AMDGPUISD::DIV_SCALE: {
@@ -547,6 +543,14 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
case ISD::ADDRSPACECAST:
return SelectAddrSpaceCast(N);
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SRA:
+ if (N->getValueType(0) != MVT::i32 ||
+ Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ break;
+
+ return SelectS_BFE(N);
}
return SelectCode(N);
@@ -966,8 +970,9 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset) const {
- SDValue Ptr, Offen, Idxen, Addr64, GLC, SLC, TFE;
+ SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const {
+ SDValue Ptr, Offen, Idxen, Addr64;
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@@ -991,8 +996,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, MVT::i1);
+ SDValue GLC, TFE;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
@@ -1147,6 +1153,95 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
}
+SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width) {
+ // Transformation function, pack the offset and width of a BFE into
+ // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+ // source, bits [5:0] contain the offset and bits [22:16] the width.
+ uint32_t PackedVal = Offset | (Width << 16);
+ SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, MVT::i32);
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
+ // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
+ // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
+ // Predicate: 0 < b <= c < 32
+
+ const SDValue &Shl = N->getOperand(0);
+ ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (B && C) {
+ uint32_t BVal = B->getZExtValue();
+ uint32_t CVal = C->getZExtValue();
+
+ if (0 < BVal && BVal <= CVal && CVal < 32) {
+ bool Signed = N->getOpcode() == ISD::SRA;
+ unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+ return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
+ CVal - BVal, 32 - CVal);
+ }
+ }
+ return SelectCode(N);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::AND:
+ if (N->getOperand(0).getOpcode() == ISD::SRL) {
+ // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
+ // Predicate: isMask(mask)
+ const SDValue &Srl = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue();
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ }
+ break;
+ case ISD::SRL:
+ if (N->getOperand(0).getOpcode() == ISD::AND) {
+ // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
+ // Predicate: isMask(mask >> b)
+ const SDValue &And = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ } else if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ case ISD::SRA:
+ if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ }
+
+ return SelectCode(N);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 4707279..62a33fa 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -885,9 +885,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDGPU_fract:
- case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_clamp:
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index f4de2d6..f0f10ca 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
+ : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
@@ -152,26 +152,22 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
return true;
}
-
-MachineInstr *
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// TODO: Implement this function
return nullptr;
}
-MachineInstr*
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
// TODO: Implement this function
return nullptr;
}
-bool
-AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
+bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ ArrayRef<unsigned> Ops) const {
// TODO: Implement this function
return false;
}
@@ -360,8 +356,8 @@ static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
}
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- int MCOp = AMDGPU::getMCOpcode(Opcode,
- AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
+ int MCOp = AMDGPU::getMCOpcode(
+ Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 202183c..07042b5 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -85,14 +85,13 @@ public:
const TargetRegisterInfo *TRI) const override;
protected:
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
MachineInstr *LoadMI) const override;
+
public:
/// \returns the smallest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
@@ -103,7 +102,7 @@ public:
int getIndirectIndexEnd(const MachineFunction &MF) const;
bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const override;
+ ArrayRef<unsigned> Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr *> &NewMIs) const override;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 849b241..4d08201 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -578,22 +578,20 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
// Bitfield extract patterns
-/*
-
-XXX: The BFE pattern is not working correctly because the XForm is not being
-applied.
+def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
+ return isMask_32(N->getZExtValue());
+}]>;
-def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
-def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
- SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(countTrailingOnes(N->getZExtValue()), MVT::i32);}]>>;
+def IMMPopCount : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()),
+ MVT::i32);
+}]>;
-class BFEPattern <Instruction BFE> : Pat <
- (and (srl i32:$x, legalshift32:$y), bfemask:$z),
- (BFE $x, $y, $z)
+class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
+ (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
+ (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
>;
-*/
-
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
(rotr i32:$src0, i32:$src1),
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index eee9c29..ab489cd 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
}
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index b81fef4..175dcd8 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -87,7 +88,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
continue;
if (Use->getParent()->getParent() == &F)
LocalMemAvailable -=
- Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+ Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
}
}
}
@@ -276,8 +277,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
// value from the reqd_work_group_size function attribute if it is
// available.
unsigned WorkGroupSize = 256;
- int AllocaSize = WorkGroupSize *
- Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+ int AllocaSize =
+ WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
if (AllocaSize > LocalMemAvailable) {
DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
@@ -294,9 +295,9 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Promoting alloca to local memory\n");
LocalMemAvailable -= AllocaSize;
+ Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
GlobalVariable *GV = new GlobalVariable(
- *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
- GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+ *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
FunctionType *FTy = FunctionType::get(
@@ -332,7 +333,7 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
Indices.push_back(TID);
- Value *Offset = Builder.CreateGEP(GV, Indices);
+ Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
I.mutateType(Offset->getType());
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 57b054b..3ca0eca 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -17,10 +17,7 @@
using namespace llvm;
-AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPUGenRegisterInfo(0),
- ST(st)
- { }
+AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
//===----------------------------------------------------------------------===//
// Function handling callbacks - Functions are a seldom used feature of GPUS, so
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index f27576a..cfd800b 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -30,9 +30,8 @@ class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
static const MCPhysReg CalleeSavedReg;
- const AMDGPUSubtarget &ST;
- AMDGPURegisterInfo(const AMDGPUSubtarget &st);
+ AMDGPURegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override {
assert(!"Unimplemented"); return BitVector();
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 70c8525..0ead652 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -70,7 +70,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
- EnableVGPRSpilling(false),
+ EnableVGPRSpilling(false), SGPRInitBug(false),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 1b0122c..403a3e4 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -44,6 +44,10 @@ public:
VOLCANIC_ISLANDS,
};
+ enum {
+ FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+ };
+
private:
std::string DevName;
bool Is64bit;
@@ -66,6 +70,7 @@ private:
bool CFALUBug;
int LocalMemorySize;
bool EnableVGPRSpilling;
+ bool SGPRInitBug;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -206,6 +211,10 @@ public:
return LocalMemorySize;
}
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
+
unsigned getAmdKernelCodeChipID() const;
bool enableMachineScheduler() const override {
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index a862f3c..cb95835 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -71,10 +71,10 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OptLevel)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- DL(computeDataLayout(TT)),
- TLOF(new TargetLoweringObjectFileELF()),
- Subtarget(TT, CPU, FS, *this), IntrinsicInfo() {
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+ OptLevel),
+ TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+ IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
@@ -118,7 +118,7 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
return createR600MachineScheduler(C);
return nullptr;
@@ -174,7 +174,7 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (ST.isPromoteAllocaEnabled()) {
addPass(createAMDGPUPromoteAlloca(ST));
addPass(createSROAPass());
@@ -184,7 +184,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
bool
AMDGPUPassConfig::addPreISel() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createFlattenCFGPass());
if (ST.IsIRStructurizerEnabled())
addPass(createStructurizeCFGPass());
@@ -211,7 +211,7 @@ void R600PassConfig::addPreRegAlloc() {
}
void R600PassConfig::addPreSched2() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createR600EmitClauseMarkers(), false);
if (ST.isIfCvtEnabled())
addPass(&IfConverterID, false);
@@ -251,15 +251,15 @@ bool GCNPassConfig::addInstSelector() {
}
void GCNPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
- // Don't do this with no optimizations since it throws away debug info by
- // merging nonadjacent loads.
+ // Don't do this with no optimizations since it throws away debug info by
+ // merging nonadjacent loads.
- // This should be run after scheduling, but before register allocation. It
- // also need extra copies to the address operand to be eliminated.
- initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
- insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+ // This should be run after scheduling, but before register allocation. It
+ // also need extra copies to the address operand to be eliminated.
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
}
addPass(createSIShrinkInstructionsPass(), false);
addPass(createSIFixSGPRLiveRangesPass(), false);
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index a691536..785c119 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -30,7 +30,6 @@ namespace llvm {
class AMDGPUTargetMachine : public LLVMTargetMachine {
private:
- const DataLayout DL;
protected:
TargetLoweringObjectFile *TLOF;
@@ -42,12 +41,9 @@ public:
StringRef CPU, TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
- // FIXME: This is currently broken, the DataLayout needs to move to
- // the target machine.
- const DataLayout *getDataLayout() const override {
- return &DL;
- }
- const AMDGPUSubtarget *getSubtargetImpl() const override {
+
+ const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index 68f4600..96edc41 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -36,13 +37,15 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
// TODO: Do we want runtime unrolling?
for (const BasicBlock *BB : L->getBlocks()) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
for (const Instruction &I : *BB) {
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
const Value *Ptr = GEP->getPointerOperand();
- const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
+ const AllocaInst *Alloca =
+ dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
if (Alloca) {
// We want to do whatever we can to limit the number of alloca
// instructions that make it through to the code generator. allocas
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index ee6e8ec..ee6551b 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -10,8 +10,8 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include <deque>
using namespace llvm;
@@ -165,6 +166,7 @@ public:
TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
OrderedBlks.clear();
+ Visited.clear();
FuncRep = &MF;
MLI = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
@@ -621,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
++It) {
MachineInstr *instr = &(*It);
- if (instr->getDebugLoc().isUnknown() == false)
+ if (!instr->getDebugLoc().isUnknown())
DL = instr->getDebugLoc();
}
return DL;
@@ -1075,21 +1077,19 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
}
int AMDGPUCFGStructurizer::loopendPatternMatch() {
- std::vector<MachineLoop *> NestedLoops;
- for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E;
- ++It)
- for (MachineLoop *ML : depth_first(*It))
- NestedLoops.push_back(ML);
+ std::deque<MachineLoop *> NestedLoops;
+ for (auto &It: *MLI)
+ for (MachineLoop *ML : depth_first(It))
+ NestedLoops.push_front(ML);
if (NestedLoops.size() == 0)
return 0;
- // Process nested loop outside->inside, so "continue" to a outside loop won't
- // be mistaken as "break" of the current loop.
+ // Process nested loop outside->inside (we did push_front),
+ // so "continue" to a outside loop won't be mistaken as "break"
+ // of the current loop.
int Num = 0;
- for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
- E = NestedLoops.rend(); It != E; ++It) {
- MachineLoop *ExaminedLoop = *It;
+ for (MachineLoop *ExaminedLoop : NestedLoops) {
if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
continue;
DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
@@ -1611,7 +1611,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
- if (UseContinueLogical == false) {
+ if (!UseContinueLogical) {
int BranchOpcode =
TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
getBranchZeroOpcode(OldOpcode);
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 3b4ba1a..49f0f23 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -46,10 +46,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
/// }
public:
- AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), Parser(Parser) {
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 9f9472c..5560146 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -287,9 +287,8 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
VecALU
>;
-// XXX: This pattern is broken, disabling for now. See comment in
-// AMDGPUInstructions.td for more info.
-// def : BFEPattern <BFE_UINT_eg>;
+def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
+
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
VecALU
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index b66ed10..d62fd3f 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -99,6 +99,12 @@ void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
printU8ImmDecOperand(MI, OpNo, O);
}
+void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " gds";
+}
+
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
@@ -208,6 +214,16 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
}
+void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
+ O << "_e64 ";
+ else
+ O << "_e32 ";
+
+ printOperand(MI, OpNo, O);
+}
+
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
int32_t SImm = static_cast<int32_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 1d43c7a..5289718 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -44,10 +44,12 @@ private:
void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
+ void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printImmediate32(uint32_t I, raw_ostream &O);
void printImmediate64(uint64_t I, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 83403ba..fb2deef 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -17,6 +17,7 @@
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "SIDefines.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -72,50 +73,19 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
-static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
- return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
- } else {
- return createR600MCCodeEmitter(MCII, MRI, STI);
- }
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS, MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, false);
-}
-
extern "C" void LLVMInitializeR600TargetMC() {
+ for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
+
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createAMDGPUMCCodeGenInfo);
+ TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
+ TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
+ TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+ }
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Z(TheGCNTarget);
-
- TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheGCNTarget, createAMDGPUMCCodeGenInfo);
-
- TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheGCNTarget, createAMDGPUMCInstrInfo);
-
- TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheGCNTarget, createAMDGPUMCRegisterInfo);
-
- TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheGCNTarget, createAMDGPUMCSubtargetInfo);
-
- TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheGCNTarget, createAMDGPUMCInstPrinter);
-
- TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createAMDGPUMCCodeEmitter);
-
- TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheGCNTarget, createAMDGPUAsmBackend);
-
- TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheGCNTarget, createMCStreamer);
+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
+ createR600MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index bc8cd53..23f0196 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
#include "llvm/ADT/StringRef.h"
namespace llvm {
@@ -34,11 +35,10 @@ extern Target TheGCNTarget;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI);
+ MCContext &Ctx);
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 8a555ff..fa25f59 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -81,8 +81,8 @@ enum FCInstr {
};
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
return new R600MCCodeEmitter(MCII, MRI);
}
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 7e23772..760aa37 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -72,7 +72,6 @@ public:
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new SIMCCodeEmitter(MCII, MRI, Ctx);
}
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index fb5aa61..82c6d13 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -119,8 +119,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
// Volcanic Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"tonga", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"tonga", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
-def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"iceland", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
index f07be00..3cb9021 100644
--- a/lib/Target/R600/R600ClauseMergePass.cpp
+++ b/lib/Target/R600/R600ClauseMergePass.cpp
@@ -14,11 +14,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index c738611..a34e2dc 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -837,6 +837,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
@@ -1479,8 +1483,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// Lower loads constant address space global variable loads
if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- isa<GlobalVariable>(
- GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
+ isa<GlobalVariable>(GetUnderlyingObject(
+ LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
@@ -1867,7 +1871,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
SelectCC.getOperand(0), // LHS
SelectCC.getOperand(1), // RHS
DAG.getConstant(-1, MVT::i32), // True
- DAG.getConstant(0, MVT::i32), // Flase
+ DAG.getConstant(0, MVT::i32), // False
SelectCC.getOperand(4)); // CC
break;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 653fd0d..5f0bdf3 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -29,9 +29,7 @@ using namespace llvm;
#include "AMDGPUGenDFAPacketizer.inc"
R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st),
- RI(st)
- { }
+ : AMDGPUInstrInfo(st), RI() {}
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
return RI;
@@ -268,9 +266,8 @@ int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
return getOperandIdx(Opcode, OpTable[SrcNum]);
}
-#define SRC_SEL_ROWS 11
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
- static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+ static const unsigned SrcSelTable[][2] = {
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
@@ -284,14 +281,13 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
};
- for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
- if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
- return getOperandIdx(Opcode, SrcSelTable[i][1]);
+ for (const auto &Row : SrcSelTable) {
+ if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
+ return getOperandIdx(Opcode, Row[1]);
}
}
return -1;
}
-#undef SRC_SEL_ROWS
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr *MI) const {
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index 742c0e0..0c06ccc 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -27,10 +27,9 @@
/// to reduce MOV count.
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,6 +37,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index dc95675..fb0359c 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -20,14 +20,16 @@
using namespace llvm;
-R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
- { RCW.RegWeight = 0; RCW.WeightLimit = 0;}
+R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
+ RCW.RegWeight = 0;
+ RCW.WeightLimit = 0;
+}
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo());
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index f1a8a41..9713e60 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -24,7 +24,7 @@ class AMDGPUSubtarget;
struct R600RegisterInfo : public AMDGPURegisterInfo {
RegClassWeight RCW;
- R600RegisterInfo(const AMDGPUSubtarget &st);
+ R600RegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
index f34c375..0c54446 100644
--- a/lib/Target/R600/SIFixSGPRLiveRanges.cpp
+++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -54,6 +54,7 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp
index ae4b05d..7ba5a6d 100644
--- a/lib/Target/R600/SIFoldOperands.cpp
+++ b/lib/Target/R600/SIFoldOperands.cpp
@@ -17,9 +17,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-fold-operands"
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 7d794b8..bd0c3c2 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -172,16 +172,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
- // We only support LOAD/STORE and vector manipulation ops for vectors
- // with > 4 elements.
- MVT VecTypes[] = {
- MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
- };
-
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
- for (MVT VT : VecTypes) {
+ // We only support LOAD/STORE and vector manipulation ops for vectors
+ // with > 4 elements.
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
case ISD::LOAD:
@@ -206,10 +202,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
@@ -932,6 +928,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
+ DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
+
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@@ -1346,6 +1348,35 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
return SDValue();
}
+/// \brief Return true if the given offset Size in bytes can be folded into
+/// the immediate offsets of a memory instruction for the given address space.
+static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
+ const AMDGPUSubtarget &STI) {
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS: {
+ // MUBUF instructions a 12-bit offset in bytes.
+ return isUInt<12>(OffsetSize);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // SMRD instructions have an 8-bit offset in dwords on SI and
+ // a 20-bit offset in bytes on VI.
+ if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return isUInt<20>(OffsetSize);
+ else
+ return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
+ }
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::REGION_ADDRESS: {
+ // The single offset versions have a 16-bit offset in bytes.
+ return isUInt<16>(OffsetSize);
+ }
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ // Indirect register addressing does not use any offsets.
+ default:
+ return 0;
+ }
+}
+
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
// This is a variant of
@@ -1377,13 +1408,10 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
if (!CAdd)
return SDValue();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
// If the resulting offset is too large, we can't fold it into the addressing
// mode offset.
APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
- if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace))
+ if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -1595,6 +1623,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::UMAX:
case AMDGPUISD::UMIN: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMin3Max3Combine(N, DCI);
break;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 50f20ac..90a37f1 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -259,7 +259,8 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
return;
}
- if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
// or SMEM clause, respectively.
//
@@ -412,7 +413,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
+ AMDGPUSubtarget::VOLCANIC_ISLANDS)
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index c90c741..4167590 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -83,6 +83,9 @@ class Enc64 {
int Size = 8;
}
+class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
+def VOPDstVCC : VOPDstOperand <VCCReg>;
+
let Uses = [EXEC] in {
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -96,7 +99,7 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
}
class VOPCCommon <dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> {
+ VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
let DisableEncoding = "$dst";
let VOPC = 1;
@@ -577,6 +580,12 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
let DS = 1;
let UseNamedOperandTable = 1;
let DisableEncoding = "$m0";
+
+ // Most instruction load and store data, so set this as the default.
+ let mayLoad = 1;
+ let mayStore = 1;
+
+ let hasSideEffects = 0;
let SchedRW = [WriteLDS];
}
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 4f1e5ad..ba98ad7 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -28,7 +28,7 @@
using namespace llvm;
SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st), RI(st) {}
+ : AMDGPUInstrInfo(st), RI() {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -120,12 +120,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
+ const ConstantSDNode *Load0Offset =
+ dyn_cast<ConstantSDNode>(Load0->getOperand(1));
+ const ConstantSDNode *Load1Offset =
+ dyn_cast<ConstantSDNode>(Load1->getOperand(1));
+
+ if (!Load0Offset || !Load1Offset)
+ return false;
+
// Check chain.
if (findChainOperand(Load0) != findChainOperand(Load1))
return false;
- Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
+ Offset0 = Load0Offset->getZExtValue();
+ Offset1 = Load1Offset->getZExtValue();
return true;
}
@@ -418,7 +426,9 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
-unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+ const unsigned Opcode = MI.getOpcode();
+
int NewOpc;
// Try to map original to commuted opcode
@@ -583,10 +593,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
unsigned InputPtrReg =
TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
- static const unsigned TIDIGRegs[3] = {
- TIDIGXReg, TIDIGYReg, TIDIGZReg
- };
- for (unsigned Reg : TIDIGRegs) {
+ for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
}
@@ -720,6 +727,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+ unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ unsigned Src0 = MI->getOperand(1).getReg();
+ unsigned Src1 = MI->getOperand(2).getReg();
+ const MachineOperand &SrcCond = MI->getOperand(3);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
+ .addOperand(SrcCond);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
+ .addOperand(SrcCond);
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
@@ -792,7 +819,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
}
if (MI)
- MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+ MI->setDesc(get(commuteOpcode(*MI)));
return MI;
}
@@ -1172,32 +1199,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
return RI.opCanUseInlineConstant(OpInfo.OperandType);
}
-bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
- // MUBUF instructions a 12-bit offset in bytes.
- return isUInt<12>(OffsetSize);
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
- // SMRD instructions have an 8-bit offset in dwords on SI and
- // a 20-bit offset in bytes on VI.
- if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return isUInt<20>(OffsetSize);
- else
- return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
- }
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
- // The single offset versions have a 16-bit offset in bytes.
- return isUInt<16>(OffsetSize);
- }
- case AMDGPUAS::PRIVATE_ADDRESS:
- // Indirect register addressing does not use any offsets.
- default:
- return 0;
- }
-}
-
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
int Op32 = AMDGPU::getVOPe32(Opcode);
if (Op32 == -1)
@@ -1405,6 +1406,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
@@ -1423,6 +1425,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
+ case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
}
}
@@ -1865,12 +1868,15 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineInstr *Addr64 =
BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
.addOperand(*VData)
- .addOperand(*SRsrc)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
+ .addOperand(*SRsrc)
.addOperand(*SOffset)
- .addOperand(*Offset);
+ .addOperand(*Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0); // tfe
MI->removeFromParent();
MI = Addr64;
@@ -1914,14 +1920,20 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
// The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
// on VI.
+
+ bool IsKill = SBase->isKill();
if (OffOp) {
- bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ bool isVI =
+ MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS;
unsigned OffScale = isVI ? 1 : 4;
// Handle the _IMM variant
unsigned LoOffset = OffOp->getImm() * OffScale;
unsigned HiOffset = LoOffset + HalfSize;
Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
- .addOperand(*SBase)
+ // Use addReg instead of addOperand
+ // to make sure kill flag is cleared.
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addImm(LoOffset / OffScale);
if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
@@ -1930,25 +1942,28 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
.addImm(HiOffset); // The offset in register is in bytes.
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
} else {
Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addImm(HiOffset / OffScale);
}
} else {
// Handle the _SGPR variant
MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addOperand(*SOff);
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
.addOperand(*SOff)
.addImm(HalfSize);
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
}
@@ -2003,7 +2018,8 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
// SMRD instructions take a dword offsets on SI and byte offset on VI
// and MUBUF instructions always take a byte offset.
ImmOffset = MI->getOperand(2).getImm();
- if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
+ AMDGPUSubtarget::SEA_ISLANDS)
ImmOffset <<= 2;
RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -2043,13 +2059,15 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
.addImm(AMDGPU::sub3);
MI->setDesc(get(NewOpcode));
if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ MI->getOperand(2).setReg(SRsrc);
} else {
- MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ MI->getOperand(2).ChangeToRegister(SRsrc, false);
}
- MI->getOperand(1).setReg(SRsrc);
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
const TargetRegisterClass *NewDstRC =
RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 12dc3f3..a9aa99f 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -114,7 +114,7 @@ public:
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
- unsigned commuteOpcode(unsigned Opcode) const;
+ unsigned commuteOpcode(const MachineInstr &MI) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI = false) const override;
@@ -218,10 +218,6 @@ public:
bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO) const;
- /// \brief Return true if the given offset Size in bytes can be folded into
- /// the immediate offsets of a memory instruction for the given address space.
- bool canFoldOffset(unsigned OffsetSize, unsigned AS) const;
-
/// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
/// This function will return false if you pass it a 32-bit instruction.
bool hasVALU32BitEncoding(unsigned Opcode) const;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index e2747dc..d603ecb 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -264,6 +264,9 @@ def ds_offset0 : Operand<i8> {
def ds_offset1 : Operand<i8> {
let PrintMethod = "printDSOffset1";
}
+def gds : Operand <i1> {
+ let PrintMethod = "printGDS";
+}
def glc : Operand <i1> {
let PrintMethod = "printGLC";
}
@@ -284,6 +287,8 @@ def ClampMod : Operand <i1> {
} // End OperandType = "OPERAND_IMMEDIATE"
+def VOPDstS64 : VOPDstOperand <SReg_64>;
+
//===----------------------------------------------------------------------===//
// Complex patterns
//===----------------------------------------------------------------------===//
@@ -292,7 +297,7 @@ def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
@@ -315,6 +320,7 @@ def SIOperand {
def SRCMODS {
int NONE = 0;
+ int NEG = 1;
}
def DSTCLAMP {
@@ -516,7 +522,7 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
string opName, PatLeaf cond> : SOPC <
op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
- opName#" $dst, $src0, $src1", []>;
+ opName#" $src0, $src1", []>;
class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
: SOPC_Helper<op, SSrc_32, i32, opName, cond>;
@@ -637,9 +643,9 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> {
// Returns the register class to use for the destination of VOP[123C]
// instructions for the given VT.
class getVALUDstForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32,
- !if(!eq(VT.Size, 64), VReg_64,
- SReg_64)); // else VT == i1
+ RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
+ !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
+ VOPDstOperand<SReg_64>)); // else VT == i1
}
// Returns the register class to use for source 0 of VOP[12C]
@@ -717,7 +723,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
class getAsm32 <int NumSrcArgs> {
string src1 = ", $src1";
string src2 = ", $src2";
- string ret = " $dst, $src0"#
+ string ret = "$dst, $src0"#
!if(!eq(NumSrcArgs, 1), "", src1)#
!if(!eq(NumSrcArgs, 3), src2, "");
}
@@ -733,7 +739,7 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
string ret =
!if(!eq(HasModifiers, 0),
getAsm32<NumSrcArgs>.ret,
- " $dst, "#src0#src1#src2#"$clamp"#"$omod");
+ "$dst, "#src0#src1#src2#"$clamp"#"$omod");
}
@@ -745,7 +751,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field ValueType Src0VT = ArgVT[1];
field ValueType Src1VT = ArgVT[2];
field ValueType Src2VT = ArgVT[3];
- field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
@@ -761,7 +767,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
- field string Asm32 = "_e32"#getAsm32<NumSrcArgs>.ret;
+ field string Asm32 = getAsm32<NumSrcArgs>.ret;
field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
}
@@ -788,22 +794,27 @@ def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = " $dst, $src0_modifiers, $src1";
+ let Asm64 = "$dst, $src0_modifiers, $src1";
}
def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = " $dst, $src0_modifiers, $src1";
+ let Asm64 = "$dst, $src0_modifiers, $src1";
}
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+ let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
+ let Asm64 = "$dst, $src0, $src1, $src2";
+}
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
- field string Asm = " $dst, $src0, $vsrc1, $src2";
+ field string Asm = "$dst, $src0, $vsrc1, $src2";
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -835,23 +846,28 @@ class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
field bits<9> src0;
}
+class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
+ VOP1<op.SI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+
+class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
+ VOP1<op.VI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>;
+
multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- def _vi : VOP1<op.VI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+
+ def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
}
multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- // No VI instruction. This class is for SI only.
+ def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
}
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
@@ -862,13 +878,20 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
let isCodeGenOnly = 1;
}
+class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
+ VOP2 <op.SI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+
+class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
+ VOP2 <op.SI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>;
+
multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, string revOp> {
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
+ def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
}
multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
@@ -876,10 +899,10 @@ multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+
+ def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+
}
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
@@ -1047,9 +1070,10 @@ multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
- bit HasMods, bit defExec> {
+ bit HasMods, bit defExec, string revOp> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
@@ -1086,7 +1110,7 @@ multiclass VOP1_Helper <vop1 op, string opName, dag outs,
defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
- defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
+ defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
}
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
@@ -1121,7 +1145,7 @@ multiclass VOP2_Helper <vop2 op, string opName, dag outs,
defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
@@ -1145,7 +1169,7 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
string revOp = opName> {
defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
- defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
+ defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1163,7 +1187,7 @@ multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3b_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
@@ -1189,7 +1213,7 @@ multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
string revOp, bit HasMods> {
defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
- defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
+ defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
revOp, HasMods>;
}
@@ -1235,28 +1259,30 @@ class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
}
multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, bit DefExec> {
+ string opName, bit DefExec, string revOpName = ""> {
def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
def _si : VOPC<op.SI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.SI> {
let Defs = !if(DefExec, [EXEC], []);
+ let hasSideEffects = DefExec;
}
def _vi : VOPC<op.VI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.VI> {
let Defs = !if(DefExec, [EXEC], []);
+ let hasSideEffects = DefExec;
}
}
multiclass VOPC_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec> {
+ bit HasMods, bit DefExec, string revOp> {
defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
- opName, HasMods, DefExec>;
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+ opName, HasMods, DefExec, revOp>;
}
// Special case for class instructions which only have modifiers on
@@ -1264,20 +1290,21 @@ multiclass VOPC_Helper <vopc op, string opName,
multiclass VOPC_Class_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec> {
+ bit HasMods, bit DefExec, string revOp> {
defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
- opName, HasMods, DefExec>,
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+ opName, HasMods, DefExec, revOp>,
VOP3DisableModFields<1, 0, 0>;
}
multiclass VOPCInst <vopc op, string opName,
VOPProfile P, PatLeaf cond = COND_NULL,
+ string revOp = opName,
bit DefExec = 0> : VOPC_Helper <
op, opName,
P.Ins32, P.Asm32, [],
- (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
!if(P.HasModifiers,
[(set i1:$dst,
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1285,54 +1312,55 @@ multiclass VOPCInst <vopc op, string opName,
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
- P.HasModifiers, DefExec
+ P.HasModifiers, DefExec, revOp
>;
multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
bit DefExec = 0> : VOPC_Class_Helper <
op, opName,
P.Ins32, P.Asm32, [],
- (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
!if(P.HasModifiers,
[(set i1:$dst,
(AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
[(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
- P.HasModifiers, DefExec
+ P.HasModifiers, DefExec, opName
>;
-multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
-multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
-multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
-multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
multiclass VOPCX <vopc op, string opName, VOPProfile P,
- PatLeaf cond = COND_NULL>
- : VOPCInst <op, opName, P, cond, 1>;
+ PatLeaf cond = COND_NULL,
+ string revOp = "">
+ : VOPCInst <op, opName, P, cond, revOp, 1>;
-multiclass VOPCX_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
-multiclass VOPCX_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
-multiclass VOPCX_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
-multiclass VOPCX_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
- op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
+ op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods
>;
multiclass VOPC_CLASS_F32 <vopc op, string opName> :
@@ -1349,7 +1377,7 @@ multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
- op, opName, P.Outs, P.Ins64, P.Asm64,
+ op, opName, (outs P.DstRC.RegClass:$dst), P.Ins64, P.Asm64,
!if(!eq(P.NumSrcArgs, 3),
!if(P.HasModifiers,
[(set P.DstVT:$dst,
@@ -1381,7 +1409,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
op, opName,
- P.Outs,
+ (outs P.DstRC.RegClass:$dst),
(ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
@@ -1483,10 +1511,8 @@ class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
DSe_vi <op>,
SIMCInstr <opName, SISubtarget.VI>;
-class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe <op>,
- SIMCInstr <opName, SISubtarget.SI> {
+class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS_Real_si <op,opName, outs, ins, asm> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
@@ -1494,10 +1520,8 @@ class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
let offset1 = offset{15-8};
}
-class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe_vi <op>,
- SIMCInstr <opName, SISubtarget.VI> {
+class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS_Real_vi <op, opName, outs, ins, asm> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
@@ -1505,180 +1529,168 @@ class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
let offset1 = offset{15-8};
}
-multiclass DS_1A_Load_m <bits<8> op, string opName, dag outs, dag ins, string asm,
- list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
+multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset$gds"> {
- let data0 = 0, data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_1A_Load_m <
- op,
- asm,
- (outs regClass:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr"#"$offset",
- []>;
-
-multiclass DS_Load2_m <bits<8> op, string opName, dag outs, dag ins, string asm,
- list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let data0 = 0, data1 = 0 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+ gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_Load2_m <
- op,
- asm,
- (outs regClass:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
- M0Reg:$m0),
- asm#" $vdst, $addr"#"$offset0"#"$offset1",
- []>;
-
-multiclass DS_1A_Store_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let data1 = 0, vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds,
+ M0Reg:$m0),
+ string asm = opName#" $addr, $data0"#"$offset$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<opName, 0>;
+
+ let data1 = 0, vdst = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_1A_Store_m <
- op,
- asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0"#"$offset",
- []>;
-
-multiclass DS_Store_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let vdst = 0 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_Store_m <
- op,
- asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, regClass:$data1,
- ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
- asm#" $addr, $data0, $data1"#"$offset0"#"$offset1",
- []>;
-
-// 1 address, 1 data.
-multiclass DS_1A1D_RET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1,
- hasPostISelHook = 1 // Adjusted to no return version.
- in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 1>;
-
- let data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = "",
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds,
+ M0Reg:$m0),
+ string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
+
+ let data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = ""> : DS_1A1D_RET_m <
- op, asm,
- (outs rc:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0"#"$offset", [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_RET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1,
- hasPostISelHook = 1 // Adjusted to no return version.
- in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 1>;
-
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = "", dag ins,
+ dag outs = (outs rc:$vdst),
+ string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
+
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = ""> : DS_1A2D_RET_m <
- op, asm,
- (outs rc:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0, $data1"#"$offset",
- [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 0>;
+ string noRetOp = "", RegisterClass src = rc> :
+ DS_1A2D_RET_m <op, asm, rc, noRetOp,
+ (ins VGPR_32:$addr, src:$data0, src:$data1,
+ ds_offset:$offset, gds:$gds, M0Reg:$m0)
+>;
- let vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 0>;
+
+ let vdst = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = asm> : DS_1A2D_NORET_m <
- op, asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0, $data1"#"$offset",
- [], noRetOp>;
+multiclass DS_0A_RET <bits<8> op, string opName,
+ dag outs = (outs VGPR_32:$vdst),
+ dag ins = (ins ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst"#"$offset"#"$gds"> {
-// 1 address, 1 data.
-multiclass DS_1A1D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 0>;
+ def "" : DS_Pseudo <opName, outs, ins, []>;
- let data1 = 0, vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
- }
+ let addr = 0, data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // end addr = 0, data0 = 0, data1 = 0
+ } // end mayLoad = 1, mayStore = 1
}
-multiclass DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = asm> : DS_1A1D_NORET_m <
- op, asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0"#"$offset",
- [], noRetOp>;
+multiclass DS_1A_RET_GDS <bits<8> op, string opName,
+ dag outs = (outs VGPR_32:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0, gds = 1 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // end data0 = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A_GDS <bits<8> op, string opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, M0Reg:$m0),
+ string asm = opName#" $addr gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+ } // end vdst = 0, data = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A <bits<8> op, string opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0, gds:$gds),
+ string asm = opName#" $addr"#"$offset"#"$gds"> {
+
+ let mayLoad = 1, mayStore = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // let vdst = 0, data0 = 0, data1 = 0
+ } // end mayLoad = 1, mayStore = 1
+}
//===----------------------------------------------------------------------===//
// MTBUF classes
@@ -1861,14 +1873,14 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_addr64", (outs),
(ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
- mbuf_offset:$offset, SCSrc_32:$soffset, slc:$slc),
+ SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
>;
defm _OFFSET : MUBUFAtomicOffset_m <
op, name#"_offset", (outs),
- (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, slc:$slc),
+ (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset,
+ slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
>;
} // glc = 0
@@ -1880,7 +1892,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_rtn_addr64", (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
- mbuf_offset:$offset, SSrc_32:$soffset, slc:$slc),
+ SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
[(set vt:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
@@ -1889,8 +1901,8 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_OFFSET : MUBUFAtomicOffset_m <
op, name#"_rtn_offset", (outs rc:$vdata),
- (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, slc:$slc),
+ (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
[(set vt:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
@@ -1909,9 +1921,8 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let mayLoad = 1, mayStore = 0 in {
let offen = 0, idxen = 0, vaddr = 0 in {
defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
- (ins SReg_128:$srsrc,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
- slc:$slc, tfe:$tfe),
+ (ins SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
i32:$soffset, i16:$offset,
@@ -1920,7 +1931,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let offen = 1, idxen = 0 in {
defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+ (ins VGPR_32:$vaddr, SReg_128:$srsrc,
SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
@@ -1928,45 +1939,48 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let offen = 0, idxen = 1 in {
defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+ (ins VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 1 in {
defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
- let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0 in {
+ let offen = 0, idxen = 0 in {
defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset",
+ (ins VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset,
+ glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#
+ "$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
i64:$vaddr, i32:$soffset,
- i16:$offset)))]>;
+ i16:$offset, i1:$glc, i1:$slc,
+ i1:$tfe)))]>;
}
}
}
multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
- ValueType store_vt, SDPatternOperator st> {
+ ValueType store_vt = i32, SDPatternOperator st = null_frag> {
let mayLoad = 0, mayStore = 1 in {
defm : MUBUF_m <op, name, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
- "$glc"#"$slc"#"$tfe", []>;
+ "$glc"#"$slc"#"$tfe", []>;
let offen = 0, idxen = 0, vaddr = 0 in {
defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
@@ -1974,21 +1988,40 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
let offen = 1, idxen = 0 in {
defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+ slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
"$glc"#"$slc"#"$tfe", []>;
} // end offen = 1, idxen = 0
- let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0 in {
+ let offen = 0, idxen = 1 in {
+ defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 1, idxen = 1 in {
+ defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
+ (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 0, idxen = 0 in {
defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc,
- VReg_64:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset",
+ (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
+ "$offset"#"$glc"#"$slc"#"$tfe",
[(st store_vt:$vdata,
(MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
- i32:$soffset, i16:$offset))]>;
+ i32:$soffset, i16:$offset,
+ i1:$glc, i1:$slc, i1:$tfe))]>;
}
} // End mayLoad = 0, mayStore = 1
}
@@ -2182,15 +2215,6 @@ def getVOPe32 : InstrMapping {
let ValueCols = [["4"]];
}
-// Maps an original opcode to its commuted version
-def getCommuteRev : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
def getMaskedMIMGOp : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
@@ -2208,6 +2232,33 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+def getCommuteCmpOrig : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteCmpRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+
def getMCOpcodeGen : InstrMapping {
let FilterClass = "SIMCInstr";
let RowFields = ["PseudoInstr"];
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 4f72e99..95b2470 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -28,6 +28,8 @@ def SendMsgImm : Operand<i32> {
def isGCN : Predicate<"Subtarget->getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+def isSI : Predicate<"Subtarget->getGeneration() "
+ "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
def isSICI : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@@ -153,7 +155,9 @@ defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
>;
defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
-defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
+defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
+ [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))]
+>;
defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
@@ -304,7 +308,8 @@ defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
>;
} // End Defs = [SCC]
-defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", []>;
+defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32",
+ [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
[(set i32:$dst, (mul i32:$src0, i32:$src1))]
@@ -505,31 +510,30 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
// VOPC Instructions
//===----------------------------------------------------------------------===//
-let isCompare = 1 in {
+let isCompare = 1, isCommutable = 1 in {
defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
-defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT>;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT, "v_cmp_nle_f32">;
defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
-defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE>;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
-defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">;
defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
-defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">;
defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
@@ -543,233 +547,207 @@ defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
-} // End hasSideEffects = 1
defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
-defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT>;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
-defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE>;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
-defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">;
defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
-defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">;
defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
-defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">;
defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
-defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
-} // End hasSideEffects = 1
let SubtargetPredicate = isSICI in {
defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
-defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
-defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
-defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
-defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
-let hasSideEffects = 1 in {
defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
-defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
-defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
-defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
-defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
-} // End hasSideEffects = 1
defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
-defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
-defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
-defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
-defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
-
-defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
-defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">;
-defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
-defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">;
-defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
-defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
-defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
-defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
-defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
-defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">;
-defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
-defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">;
-defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
-defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
-defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
-defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
-
-} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
+defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
+defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
+defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
+defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
} // End SubtargetPredicate = isSICI
defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT>;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">;
defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">;
defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
-} // End hasSideEffects = 1
defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT>;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">;
defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">;
defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
-} // End hasSideEffects = 1
defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT>;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">;
defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">;
defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
-} // End hasSideEffects = 1
defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT>;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
-let hasSideEffects = 1 in {
-
defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">;
defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">;
defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
-} // End hasSideEffects = 1
+} // End isCompare = 1, isCommutable = 1
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
-
-let hasSideEffects = 1 in {
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
-} // End hasSideEffects = 1
-
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
-
-let hasSideEffects = 1 in {
defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
-} // End hasSideEffects = 1
-
-} // End isCompare = 1
//===----------------------------------------------------------------------===//
// DS Instructions
//===----------------------------------------------------------------------===//
-
defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
@@ -782,12 +760,26 @@ defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
-defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+let mayLoad = 0 in {
+defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>;
+defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>;
+defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>;
+}
defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
-defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
-defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
-
+defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>;
+defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>;
+
+defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">;
+defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">;
+defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">;
+defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">;
+defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">;
+let mayLoad = 0 in {
+defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>;
+defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>;
+}
defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
@@ -800,20 +792,34 @@ defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32"
defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
-defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
+defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
-//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
-//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
+defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET <
+ 0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32
+>;
+defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET <
+ 0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32
+>;
defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
-defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
-defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-
+defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
+defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
} // End isCI
-
-
+defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
+let mayStore = 0 in {
+defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
+defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
+defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>;
+defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>;
+defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>;
+defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>;
+defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>;
+}
+defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">;
+defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">;
+defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">;
defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
@@ -826,7 +832,12 @@ defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
-defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+let mayLoad = 0 in {
+defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>;
+defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>;
+defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>;
+}
defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
@@ -844,57 +855,88 @@ defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64"
defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
-defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
-//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
-//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
+defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>;
+defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>;
defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
+let mayStore = 0 in {
+defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>;
+defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>;
+defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>;
+}
+
+defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">;
+defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">;
+defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">;
+defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">;
+defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">;
+defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">;
+defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">;
+defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">;
+defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">;
+defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">;
+defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">;
+defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">;
+defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">;
+
+defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">;
+defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">;
+
+defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">;
+defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">;
+defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">;
+defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">;
+defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">;
+defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">;
+defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">;
+defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">;
+defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">;
+defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">;
+defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">;
+defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">;
+defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
+
+defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
+defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
+
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
// DS_CONDXCHG32_RTN_B128
//} // End isCI
-// TODO: _SRC2_* forms
-
-defm DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VGPR_32>;
-defm DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VGPR_32>;
-defm DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VGPR_32>;
-defm DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>;
-
-defm DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VGPR_32>;
-defm DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VGPR_32>;
-defm DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VGPR_32>;
-defm DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VGPR_32>;
-defm DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VGPR_32>;
-defm DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>;
-
-// 2 forms.
-defm DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VGPR_32>;
-defm DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VGPR_32>;
-defm DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>;
-defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>;
-
-defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
-defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
-defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
-defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
-
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>;
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <
+ mubuf<0x00>, "buffer_load_format_x", VGPR_32
+>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <
+ mubuf<0x01>, "buffer_load_format_xy", VReg_64
+>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
+ mubuf<0x02>, "buffer_load_format_xyz", VReg_96
+>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
+ mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
+>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
+ mubuf<0x04>, "buffer_store_format_x", VGPR_32
+>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
+ mubuf<0x05>, "buffer_store_format_xy", VReg_64
+>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
+ mubuf<0x06>, "buffer_store_format_xyz", VReg_96
+>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
+ mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
+>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
>;
@@ -1418,13 +1460,17 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-defm V_CNDMASK_B32_e64 : VOP3_m_nomods <vop3<0x100>, (outs VGPR_32:$dst),
- (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2),
- "v_cndmask_b32_e64 $dst, $src0, $src1, $src2",
- [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))],
- "v_cndmask_b32_e64", 3
->;
+multiclass V_CNDMASK <vop2 op, string name> {
+ defm _e32 : VOP2_m <
+ op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
+ name, name>;
+
+ defm _e64 : VOP3_m <
+ op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
+ name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>;
+}
+defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
@@ -1568,8 +1614,8 @@ defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
>;
} // End isCommutable = 1
-defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
- AMDGPUbfm
+defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
+ VOP_I32_I32_I32
>;
defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
VOP_I32_I32_I32
@@ -1638,14 +1684,12 @@ defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
VOP_F32_F32_F32_F32
>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
VOP_I32_I32_I32_I32, AMDGPUbfe_u32
>;
defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
VOP_I32_I32_I32_I32, AMDGPUbfe_i32
>;
-}
defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
VOP_I32_I32_I32_I32, AMDGPUbfi
@@ -1833,6 +1877,11 @@ defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1, isPseudo = 1 in {
+// For use in patterns
+def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
+ (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
+>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
@@ -2049,7 +2098,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
+ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
>;
/* int_SI_export */
@@ -2196,6 +2245,11 @@ def : Pat <
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+def : Pat <
+ (i32 (select i1:$src0, i32:$src1, i32:$src2)),
+ (V_CNDMASK_B32_e64 $src2, $src1, $src0)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
@@ -2738,7 +2792,7 @@ def : Ext32Pat <anyext>;
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
+ (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
>;
// The multiplication scales from [0,1] to the unsigned integer range
@@ -2781,7 +2835,7 @@ def : ROTRPattern <V_ALIGNBIT_B32>;
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
@@ -2799,12 +2853,12 @@ def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
def : Pat <
(v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1))),
- (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
+ (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0), (S_MOV_B32 -1))
>;
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
@@ -2819,14 +2873,14 @@ def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
def : Pat <
(local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1)),
- (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
- (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
- (S_MOV_B32 -1))
+ (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+ (i1 0), (S_MOV_B32 -1))
>;
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
@@ -2842,13 +2896,13 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
@@ -2898,8 +2952,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag constant_ld> {
def : Pat <
- (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
- (Instr_ADDR64 $srsrc, $vaddr, $soffset, $offset)
+ (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
>;
}
@@ -2916,7 +2971,7 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
(vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
- (Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+ (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
@@ -2935,7 +2990,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
imm:$offset, 0, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offset $rsrc, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
@@ -2943,7 +2998,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 1, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+ (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
@@ -2951,7 +3006,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 1, imm:$glc, imm:$slc,
imm:$tfe)),
- (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
@@ -2959,7 +3014,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
imm:$offset, 1, 1, imm:$glc, imm:$slc,
imm:$tfe)),
- (bothen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+ (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
}
@@ -2974,7 +3029,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
u16imm:$offset)),
- (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+ (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
@@ -3104,26 +3159,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
// 1. Extract with offset
def : Pat<
- (vector_extract vt:$vec, (add i32:$idx, imm:$off)),
- (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+ (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
+ (SI_INDIRECT_SRC $vec, $idx, imm:$off)
>;
// 2. Extract without offset
def : Pat<
- (vector_extract vt:$vec, i32:$idx),
- (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+ (eltvt (vector_extract vt:$vec, i32:$idx)),
+ (SI_INDIRECT_SRC $vec, $idx, 0)
>;
// 3. Insert with offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
- (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
+ (IndDst $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, i32:$idx),
- (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
+ (IndDst $vec, $idx, 0, $val)
>;
}
@@ -3269,6 +3324,89 @@ def : Pat <
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
+multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
+ def : Pat <
+ (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
+ (BFM $a, $b)
+ >;
+
+ def : Pat <
+ (vt (add (vt (shl 1, vt:$a)), -1)),
+ (BFM $a, (MOV 0))
+ >;
+}
+
+defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
+// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+
+def : BFEPattern <V_BFE_U32, S_MOV_B32>;
+
+//===----------------------------------------------------------------------===//
+// Fract Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isSI] in {
+
+// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
+// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
+// way to implement it is using V_FRACT_F64.
+// The workaround for the V_FRACT bug is:
+// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
+
+// Convert (x + (-floor(x)) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_CNDMASK_B64_PSEUDO
+ $x,
+ (V_MIN_F64
+ SRCMODS.NONE,
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ SRCMODS.NONE,
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+ DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
+>;
+
+// Convert floor(x) to (x - fract(x))
+def : Pat <
+ (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
+ (V_ADD_F64
+ $mods,
+ $x,
+ SRCMODS.NEG,
+ (V_CNDMASK_B64_PSEUDO
+ $x,
+ (V_MIN_F64
+ SRCMODS.NONE,
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ SRCMODS.NONE,
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+ DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
+ DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isSI]
+
+let Predicates = [isCI] in {
+
+// Convert (x - floor(x)) to fract(x)
+def : Pat <
+ (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
+ (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
+ (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Convert (x + (-floor(x))) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isCI]
+
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
diff --git a/lib/Target/R600/SILoadStoreOptimizer.cpp b/lib/Target/R600/SILoadStoreOptimizer.cpp
index 46630d0..a927ad8 100644
--- a/lib/Target/R600/SILoadStoreOptimizer.cpp
+++ b/lib/Target/R600/SILoadStoreOptimizer.cpp
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -249,10 +250,10 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
DebugLoc DL = I->getDebugLoc();
MachineInstrBuilder Read2
= BuildMI(*MBB, I, DL, Read2Desc, DestReg)
- .addImm(0) // gds
.addOperand(*AddrReg) // addr
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addImm(0) // gds
.addOperand(*M0Reg) // M0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
@@ -332,12 +333,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
MachineInstrBuilder Write2
= BuildMI(*MBB, I, DL, Write2Desc)
- .addImm(0) // gds
.addOperand(*Addr) // addr
.addOperand(*Data0) // data0
.addOperand(*Data1) // data1
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addImm(0) // gds
.addOperand(*M0Reg) // m0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 9224e14..13a8974 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -14,7 +14,6 @@
#include "SIRegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -25,9 +24,7 @@
using namespace llvm;
-SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
- { }
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
@@ -47,14 +44,34 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::VGPR255);
Reserved.set(AMDGPU::VGPR254);
+ // Tonga and Iceland can only allocate a fixed number of SGPRs due
+ // to a hw bug.
+ if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
+ unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+ // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
+ // Assume XNACK_MASK is unused.
+ unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+
+ for (unsigned i = Limit; i < NumSGPRs; ++i) {
+ unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
+ MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
+
+ for (; R.isValid(); ++R)
+ Reserved.set(*R);
+ }
+ }
+
return Reserved;
}
-unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const {
+ const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
// FIXME: We should adjust the max number of waves based on LDS size.
- unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
- unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+ unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
+ STI.getMaxWavesPerCU());
+ unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
for (regclass_iterator I = regclass_begin(), E = regclass_end();
I != E; ++I) {
@@ -125,9 +142,10 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
int64_t Offset,
RegScavenger *RS) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
MachineBasicBlock *MBB = MI->getParent();
const MachineFunction *MF = MI->getParent()->getParent();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
LLVMContext &Ctx = MF->getFunction()->getContext();
DebugLoc DL = MI->getDebugLoc();
bool IsLoad = TII->get(LoadStoreOp).mayLoad();
@@ -162,8 +180,8 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
.addReg(ScratchRsrcReg, getKillRegState(IsKill))
- .addImm(Offset)
.addReg(SOffset)
+ .addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
@@ -178,7 +196,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineOperand &FIOp = MI->getOperand(FIOperandNum);
@@ -249,7 +268,22 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(SubReg);
}
}
- TII->insertNOPs(MI, 3);
+
+ // TODO: only do this when it is needed
+ switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
+ case AMDGPUSubtarget::SOUTHERN_ISLANDS:
+ // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
+ TII->insertNOPs(MI, 3);
+ break;
+ case AMDGPUSubtarget::SEA_ISLANDS:
+ break;
+ default: // VOLCANIC_ISLANDS and later
+ // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
+ // and later. This also applies to VALUs which write VCC, but we're
+ // unlikely to see VMEM use VCC.
+ TII->insertNOPs(MI, 4);
+ }
+
MI->eraseFromParent();
break;
}
@@ -494,14 +528,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
}
}
-unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
- switch(WaveCount) {
- case 10: return 48;
- case 9: return 56;
- case 8: return 64;
- case 7: return 72;
- case 6: return 80;
- case 5: return 96;
- default: return 103;
+unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+ unsigned WaveCount) const {
+ if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ switch (WaveCount) {
+ case 10: return 80;
+ case 9: return 80;
+ case 8: return 96;
+ default: return 102;
+ }
+ } else {
+ switch(WaveCount) {
+ case 10: return 48;
+ case 9: return 56;
+ case 8: return 64;
+ case 7: return 72;
+ case 6: return 80;
+ case 5: return 96;
+ default: return 103;
+ }
}
}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index d908ffd..bfdb67c 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -17,17 +17,19 @@
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/Support/Debug.h"
namespace llvm {
struct SIRegisterInfo : public AMDGPURegisterInfo {
- SIRegisterInfo(const AMDGPUSubtarget &st);
+ SIRegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
- unsigned getRegPressureSetLimit(unsigned Idx) const override;
+ unsigned getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
@@ -111,7 +113,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
/// concurrent waves.
- unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
+ unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+ unsigned WaveCount) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 8b25e95..7bb5dc2 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -256,10 +256,3 @@ def VSrc_64 : RegImmOperand<VS_64>;
def VCSrc_32 : RegInlineOperand<VS_32>;
def VCSrc_64 : RegInlineOperand<VS_64>;
-
-//===----------------------------------------------------------------------===//
-// SGPR and VGPR register classes
-//===----------------------------------------------------------------------===//
-
-def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
- (add VReg_128, SReg_128)>;
diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp
index 97bbd78..51e72cd 100644
--- a/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/lib/Target/R600/SIShrinkInstructions.cpp
@@ -18,9 +18,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-shrink-instructions"
@@ -88,6 +89,11 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
// Can't shrink instruction with three operands.
+ // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+ // a special case for it. It can only be shrunk if the third operand
+ // is vcc. We should handle this the same way we handle vopc, by addding
+ // a register allocation hint pre-regalloc and then do the shrining
+ // post-regalloc.
if (Src2)
return false;
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 0fa56e6..282d923 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1268,7 +1268,8 @@ int foo (void) {
..
else if (strchr ("<>", *intel_parser.op_string)
-Those should be turned into a switch.
+Those should be turned into a switch. SimplifyLibCalls only gets the second
+case.
//===---------------------------------------------------------------------===//
@@ -1843,44 +1844,6 @@ we remove checking in code like
//===---------------------------------------------------------------------===//
-This code (from Benchmarks/Dhrystone/dry.c):
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
- %sext = shl i32 %0, 24
- %conv = ashr i32 %sext, 24
- %sext6 = shl i32 %1, 24
- %conv4 = ashr i32 %sext6, 24
- %cmp = icmp eq i32 %conv, %conv4
- %. = select i1 %cmp, i32 10000, i32 0
- ret i32 %.
-}
-
-Should be simplified into something like:
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
- %sext = shl i32 %0, 24
- %conv = and i32 %sext, 0xFF000000
- %sext6 = shl i32 %1, 24
- %conv4 = and i32 %sext6, 0xFF000000
- %cmp = icmp eq i32 %conv, %conv4
- %. = select i1 %cmp, i32 10000, i32 0
- ret i32 %.
-}
-
-and then to:
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
- %conv = and i32 %0, 0xFF
- %conv4 = and i32 %1, 0xFF
- %cmp = icmp eq i32 %conv, %conv4
- %. = select i1 %cmp, i32 10000, i32 0
- ret i32 %.
-}
-//===---------------------------------------------------------------------===//
-
clang -O3 currently compiles this code
int g(unsigned int a) {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 5128843..598856f 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -74,7 +74,6 @@ public:
MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new SparcMCCodeEmitter(Ctx);
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index f72c6c4..3a6f508 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -62,8 +62,8 @@ private:
const VariantKind Kind;
const MCExpr *Expr;
- explicit SparcMCExpr(VariantKind _Kind, const MCExpr *_Expr)
- : Kind(_Kind), Expr(_Expr) {}
+ explicit SparcMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
public:
/// @name Construction
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 3cc4314..630ed1b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -122,25 +122,16 @@ static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- MCStreamer *S = createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
- new SparcTargetELFStreamer(*S);
- return S;
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ return new SparcTargetELFStreamer(S);
}
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
-
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new SparcTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new SparcTargetAsmStreamer(S, OS);
}
static MCInstPrinter *createSparcMCInstPrinter(const Target &T,
@@ -157,54 +148,37 @@ extern "C" void LLVMInitializeSparcTargetMC() {
RegisterMCAsmInfoFn X(TheSparcTarget, createSparcMCAsmInfo);
RegisterMCAsmInfoFn Y(TheSparcV9Target, createSparcV9MCAsmInfo);
+ for (Target *T : {&TheSparcTarget, &TheSparcV9Target}) {
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createSparcMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createSparcMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createSparcMCSubtargetInfo);
+
+ // Register the MC Code Emitter.
+ TargetRegistry::RegisterMCCodeEmitter(*T, createSparcMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(*T, createSparcAsmBackend);
+
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer);
+
+ // Register the MCInstPrinter
+ TargetRegistry::RegisterMCInstPrinter(*T, createSparcMCInstPrinter);
+ }
+
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
createSparcMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
createSparcV9MCCodeGenInfo);
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheSparcV9Target, createSparcMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheSparcV9Target,
- createSparcMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
- createSparcMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheSparcV9Target,
- createSparcMCSubtargetInfo);
-
- // Register the MC Code Emitter.
- TargetRegistry::RegisterMCCodeEmitter(TheSparcTarget,
- createSparcMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheSparcV9Target,
- createSparcMCCodeEmitter);
-
- //Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheSparcTarget,
- createSparcAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheSparcV9Target,
- createSparcAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheSparcTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheSparcV9Target,
- createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheSparcTarget,
- createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheSparcV9Target,
- createMCAsmStreamer);
-
- // Register the MCInstPrinter
- TargetRegistry::RegisterMCInstPrinter(TheSparcTarget,
- createSparcMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheSparcV9Target,
- createSparcMCInstPrinter);
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index c31943d..d2ec991 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -33,7 +33,6 @@ extern Target TheSparcV9Target;
MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createSparcAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 9f03b04..1cf5ccf 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -50,7 +50,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
const char *getPassName() const override {
@@ -195,12 +195,13 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
/// inline asm expressions.
bool
SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default: return true;
- case 'm': // memory
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m: // memory
if (!SelectADDRrr(Op, Op0, Op1))
SelectADDRri(Op, Op0, Op1);
break;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 6774977..c8b0570 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -915,9 +915,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const uint32_t *Mask = ((hasReturnsTwice)
- ? TRI->getRTCallPreservedMask(CallConv)
- : TRI->getCallPreservedMask(CallConv));
+ const uint32_t *Mask =
+ ((hasReturnsTwice)
+ ? TRI->getRTCallPreservedMask(CallConv)
+ : TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv));
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1229,7 +1230,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask =
((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv)
- : TRI->getCallPreservedMask(CLI.CallConv));
+ : TRI->getCallPreservedMask(DAG.getMachineFunction(),
+ CLI.CallConv));
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1904,8 +1906,8 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
Ops.push_back(Callee);
Ops.push_back(Symbol);
Ops.push_back(DAG.getRegister(SP::O0, PtrVT));
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
+ DAG.getMachineFunction(), CallingConv::C);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
Ops.push_back(InFlag);
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 8b2e6bc..4b70f16 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -33,9 +33,8 @@ using namespace llvm;
void SparcInstrInfo::anchor() {}
SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
- : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
- RI(ST), Subtarget(ST) {
-}
+ : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(),
+ Subtarget(ST) {}
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index fe93ed7..6e08418 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -22,6 +22,8 @@
namespace llvm {
+class SparcSubtarget;
+
/// SPII - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 3cca98f..9667bc0 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -34,17 +34,16 @@ static cl::opt<bool>
ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false),
cl::desc("Reserve application registers (%g2-%g4)"));
-SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st)
- : SparcGenRegisterInfo(SP::O7), Subtarget(st) {
-}
+SparcRegisterInfo::SparcRegisterInfo() : SparcGenRegisterInfo(SP::O7) {}
const MCPhysReg*
SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SaveList;
}
-const uint32_t*
-SparcRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+SparcRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
return CSR_RegMask;
}
@@ -55,6 +54,7 @@ SparcRegisterInfo::getRTCallPreservedMask(CallingConv::ID CC) const {
BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
// FIXME: G1 reserved for now for large imm generation by frame code.
Reserved.set(SP::G1);
@@ -89,6 +89,7 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
const TargetRegisterClass*
SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
+ const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
}
@@ -160,6 +161,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Addressable stack objects are accessed using neg. offsets from %fp
MachineFunction &MF = *MI.getParent()->getParent();
+ const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MI.getOperand(FIOperandNum + 1).getImm() +
Subtarget.getStackPointerBias();
@@ -174,7 +176,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
if (MI.getOpcode() == SP::STQFri) {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
unsigned SrcReg = MI.getOperand(2).getReg();
unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
@@ -186,7 +188,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(2).setReg(SrcOddReg);
Offset += 8;
} else if (MI.getOpcode() == SP::LDQFri) {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
unsigned DestReg = MI.getOperand(0).getReg();
unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 63567b0..764a894 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -20,20 +20,13 @@
#include "SparcGenRegisterInfo.inc"
namespace llvm {
-
-class SparcSubtarget;
-class TargetInstrInfo;
-class Type;
-
struct SparcRegisterInfo : public SparcGenRegisterInfo {
- SparcSubtarget &Subtarget;
-
- SparcRegisterInfo(SparcSubtarget &st);
+ SparcRegisterInfo();
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override;
- const uint32_t* getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 1c423dc..6979a17 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -56,12 +56,11 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- TLOF(make_unique<SparcELFTargetObjectFile>()),
- DL(computeDataLayout(is64bit)),
- Subtarget(TT, CPU, FS, *this, is64bit) {
+ CodeGenOpt::Level OL, bool is64bit)
+ : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
+ CM, OL),
+ TLOF(make_unique<SparcELFTargetObjectFile>()),
+ Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 4f93980..30a8ebf 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -22,7 +22,6 @@ namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL;
SparcSubtarget Subtarget;
public:
SparcTargetMachine(const Target &T, StringRef TT,
@@ -31,8 +30,9 @@ public:
CodeGenOpt::Level OL, bool is64bit);
~SparcTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const SparcSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index d9bb916..40dc48e 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -110,7 +110,6 @@ private:
MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &MCSTI,
MCContext &Ctx) {
return new SystemZMCCodeEmitter(MCII, Ctx);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 6e82b6d..ffd05a9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -181,14 +181,6 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
return new SystemZInstPrinter(MAI, MII, MRI);
}
-static MCStreamer *
-createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCContext &Ctx,
- MCAsmBackend &MAB, raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
-}
-
extern "C" void LLVMInitializeSystemZTargetMC() {
// Register the MCAsmInfo.
TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
@@ -221,8 +213,4 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget,
createSystemZMCInstPrinter);
-
- // Register the MCObjectStreamer;
- TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget,
- createSystemZMCObjectStreamer);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 5eb6526..962c950 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -71,7 +71,6 @@ inline unsigned getRegAsGRH32(unsigned Reg) {
MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index b8b0db9..a52aa25 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -328,7 +328,7 @@ public:
// Override SelectionDAGISel.
SDNode *Select(SDNode *Node) override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -1129,18 +1129,29 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
bool SystemZDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "Unexpected constraint code");
- // Accept addresses with short displacements, which are compatible
- // with Q, R, S and T. But keep the index operand for future expansion.
- SDValue Base, Disp, Index;
- if (!selectBDXAddr(SystemZAddressingMode::FormBD,
- SystemZAddressingMode::Disp12Only,
- Op, Base, Disp, Index))
- return true;
- OutOps.push_back(Base);
- OutOps.push_back(Disp);
- OutOps.push_back(Index);
- return false;
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_R:
+ case InlineAsm::Constraint_S:
+ case InlineAsm::Constraint_T:
+ // Accept addresses with short displacements, which are compatible
+ // with Q, R, S and T. But keep the index operand for future expansion.
+ SDValue Base, Disp, Index;
+ if (selectBDXAddr(SystemZAddressingMode::FormBD,
+ SystemZAddressingMode::Disp12Only,
+ Op, Base, Disp, Index)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Disp);
+ OutOps.push_back(Index);
+ return false;
+ }
+ break;
+ }
+ return true;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index e96398d..0ca8bcd 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -920,7 +920,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1858,7 +1858,8 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallingConv::C);
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index a2b10b0..23c62c9 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -233,6 +233,26 @@ public:
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode.size() == 1) {
+ switch(ConstraintCode[0]) {
+ default:
+ break;
+ case 'Q':
+ return InlineAsm::Constraint_Q;
+ case 'R':
+ return InlineAsm::Constraint_R;
+ case 'S':
+ return InlineAsm::Constraint_S;
+ case 'T':
+ return InlineAsm::Constraint_T;
+ }
+ }
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const
override;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 8488ec8..5128993 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -743,11 +743,10 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
}
-MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Opcode = MI->getOpcode();
@@ -862,9 +861,9 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
+SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
return nullptr;
}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index e711f89..b55810b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -186,11 +186,11 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const override;
bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
override;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 64f5eeb..7cabea9 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -28,7 +28,8 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
const uint32_t *
-SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
return CSR_SystemZ_RegMask;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 212fe91..a0db5a9 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -43,9 +43,9 @@ public:
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
}
- const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const
- override;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 73198b1..86baccb 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -25,12 +25,12 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ // Make sure that global data has at least 16 bits of alignment by
+ // default, so that we can refer to it using LARL. We don't have any
+ // special requirements for stack variables though.
+ : LLVMTargetMachine(T, "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64",
+ TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
- // Make sure that global data has at least 16 bits of alignment by
- // default, so that we can refer to it using LARL. We don't have any
- // special requirements for stack variables though.
- DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 52ccc5a..181b926 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -24,7 +24,6 @@ class TargetFrameLowering;
class SystemZTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL;
SystemZSubtarget Subtarget;
public:
@@ -34,9 +33,8 @@ public:
CodeGenOpt::Level OL);
~SystemZTargetMachine() override;
- // Override TargetMachine.
- const DataLayout *getDataLayout() const override { return &DL; }
- const SystemZSubtarget *getSubtargetImpl() const override {
+ const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
// Override LLVMTargetMachine
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 5b7953d..1b74e8c 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -34,7 +34,6 @@ inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfoImpl *P) {
}
void llvm::initializeTarget(PassRegistry &Registry) {
- initializeDataLayoutPassPass(Registry);
initializeTargetLibraryInfoWrapperPassPass(Registry);
initializeTargetTransformInfoWrapperPassPass(Registry);
}
@@ -48,9 +47,6 @@ LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
}
void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
- // The DataLayoutPass must now be in sync with the module. Unfortunatelly we
- // cannot enforce that from the C api.
- unwrap(PM)->add(new DataLayoutPass());
}
void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index faa6fbe..75100fb 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -343,3 +343,9 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol
// null return could mean 'no location' & we should just do that here.
return MCSymbolRefExpr::Create(Sym, *Ctx);
}
+
+void TargetLoweringObjectFile::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+ bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+ Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 307e93c..dd07f81 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SectionKind.h"
@@ -36,18 +37,20 @@ using namespace llvm;
// TargetMachine Class
//
-TargetMachine::TargetMachine(const Target &T,
+TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options)
- : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
- CodeGenInfo(nullptr), AsmInfo(nullptr),
- RequireStructuredCFG(false),
- Options(Options) {
-}
+ : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU),
+ TargetFS(FS), CodeGenInfo(nullptr), AsmInfo(nullptr), MRI(nullptr),
+ MII(nullptr), STI(nullptr), RequireStructuredCFG(false),
+ Options(Options) {}
TargetMachine::~TargetMachine() {
delete CodeGenInfo;
delete AsmInfo;
+ delete MRI;
+ delete MII;
+ delete STI;
}
/// \brief Reset the target options based on the function's attributes.
@@ -177,7 +180,7 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
const TargetLoweringObjectFile *TLOF = getObjFileLowering();
const MCSection *TheSection = TLOF->SectionForGlobal(GV, GVKind, Mang, *this);
bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection);
- Mang.getNameWithPrefix(Name, GV, CannotUsePrivateLabel);
+ TLOF->getNameWithPrefix(Name, GV, CannotUsePrivateLabel, Mang, *this);
}
MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index c7838a9..236cb1b 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -198,8 +198,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
*ErrorMessage = strdup(error.c_str());
return true;
}
- Mod->setDataLayout(td);
- pass.add(new DataLayoutPass());
+ Mod->setDataLayout(*td);
TargetMachine::CodeGenFileType ft;
switch (codegen) {
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 10597a8..b2bb59e 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -23,22 +23,6 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
TargetSubtargetInfo::~TargetSubtargetInfo() {}
-// Temporary option to compare overall performance change when moving from the
-// SD scheduler to the MachineScheduler pass pipeline. This is convenient for
-// benchmarking during the transition from SD to MI scheduling. Once armv7 makes
-// the switch, it should go away. The normal way to enable/disable the
-// MachineScheduling pass itself is by using -enable-misched. For targets that
-// already use MI sched (via MySubTarget::enableMachineScheduler())
-// -misched-bench=false negates the subtarget hook.
-static cl::opt<bool> BenchMachineSched("misched-bench", cl::Hidden,
- cl::desc("Migrate from the target's default SD scheduler to MI scheduler"));
-
-bool TargetSubtargetInfo::useMachineScheduler() const {
- if (BenchMachineSched.getNumOccurrences())
- return BenchMachineSched;
- return enableMachineScheduler();
-}
-
bool TargetSubtargetInfo::enableAtomicExpand() const {
return true;
}
@@ -47,6 +31,10 @@ bool TargetSubtargetInfo::enableMachineScheduler() const {
return false;
}
+bool TargetSubtargetInfo::enableJoinGlobalCopies() const {
+ return enableMachineScheduler();
+}
+
bool TargetSubtargetInfo::enableRALocalReassignment(
CodeGenOpt::Level OptLevel) const {
return true;
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk
index 08646d0..7194dd3 100644
--- a/lib/Target/X86/Android.mk
+++ b/lib/Target/X86/Android.mk
@@ -1,8 +1,10 @@
LOCAL_PATH := $(call my-dir)
x86_codegen_TBLGEN_TABLES := \
+ X86GenAsmMatcher.inc \
X86GenAsmWriter.inc \
X86GenAsmWriter1.inc \
+ X86GenDisassemblerTables.inc \
X86GenRegisterInfo.inc \
X86GenInstrInfo.inc \
X86GenDAGISel.inc \
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 0b6fb52..c24805a 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -11,6 +11,7 @@
#include "X86AsmInstrumentation.h"
#include "X86AsmParserCommon.h"
#include "X86Operand.h"
+#include "X86ISelLowering.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
@@ -664,6 +665,7 @@ private:
ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
std::unique_ptr<X86Operand>
ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
+ std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
SMLoc Start,
@@ -1407,6 +1409,35 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
/*Scale=*/1, Start, End, Size, Identifier, Info);
}
+//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
+std::unique_ptr<X86Operand>
+X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ consumeToken(); // Eat "{"
+ if (Tok.getIdentifier().startswith("r")){
+ int rndMode = StringSwitch<int>(Tok.getIdentifier())
+ .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
+ .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
+ .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
+ .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
+ .Default(-1);
+ if (-1 == rndMode)
+ return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
+ Parser.Lex(); // Eat "r*" of r*-sae
+ if (!getLexer().is(AsmToken::Minus))
+ return ErrorOperand(Tok.getLoc(), "Expected - at this point");
+ Parser.Lex(); // Eat "-"
+ Parser.Lex(); // Eat the sae
+ if (!getLexer().is(AsmToken::RCurly))
+ return ErrorOperand(Tok.getLoc(), "Expected } at this point");
+ Parser.Lex(); // Eat "}"
+ const MCExpr *RndModeOp =
+ MCConstantExpr::Create(rndMode, Parser.getContext());
+ return X86Operand::CreateImm(RndModeOp, Start, End);
+ }
+ return ErrorOperand(Tok.getLoc(), "unknown token in expression");
+}
/// ParseIntelMemOperand - Parse intel style memory operand.
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
SMLoc Start,
@@ -1656,6 +1687,11 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
return ParseIntelMemOperand(Imm, Start, Size);
}
+ // rounding mode token
+ if (STI.getFeatureBits() & X86::FeatureAVX512 &&
+ getLexer().is(AsmToken::LCurly))
+ return ParseRoundingModeOp(Start, End);
+
// Register.
unsigned RegNo = 0;
if (!ParseRegister(RegNo, Start, End)) {
@@ -1708,6 +1744,12 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
return nullptr;
return X86Operand::CreateImm(Val, Start, End);
}
+ case AsmToken::LCurly:{
+ SMLoc Start = Parser.getTok().getLoc(), End;
+ if (STI.getFeatureBits() & X86::FeatureAVX512)
+ return ParseRoundingModeOp(Start, End);
+ return ErrorOperand(Start, "unknown token in expression");
+ }
}
}
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index d67e119..94dbedb 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -260,6 +260,9 @@ struct X86Operand : public MCParsedAsmOperand {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1;
}
+ bool isAVX512RC() const{
+ return isImm();
+ }
bool isAbsMem16() const {
return isAbsMem() && Mem.ModeSize == 16;
@@ -394,7 +397,10 @@ struct X86Operand : public MCParsedAsmOperand {
RegNo = getGR32FromGR64(RegNo);
Inst.addOperand(MCOperand::CreateReg(RegNo));
}
-
+ void addAVX512RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 99fb1ab..e8c5475 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -378,26 +378,28 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
unsigned NewOpc;
switch (mcInst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
- case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break;
- case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break;
- case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break;
- case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break;
- case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break;
- case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break;
- case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break;
- case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break;
- case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break;
- case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break;
- case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break;
- case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break;
- case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break;
- case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break;
- case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break;
- case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break;
- case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break;
- case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break;
- case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break;
- case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break;
+ case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break;
+ case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break;
+ case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break;
+ case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break;
+ case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break;
+ case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break;
+ case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break;
+ case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break;
+ case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break;
+ case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break;
+ case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break;
+ case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break;
+ case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break;
+ case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break;
+ case X86::VCMPPDZrrib: NewOpc = X86::VCMPPDZrrib_alt; break;
+ case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break;
+ case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break;
+ case X86::VCMPPSZrrib: NewOpc = X86::VCMPPSZrrib_alt; break;
+ case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break;
+ case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break;
+ case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break;
+ case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break;
}
// Switch opcode to the one that doesn't get special printing.
mcInst.setOpcode(NewOpc);
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 619a0d4..7c9e012 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -310,11 +310,8 @@ static bool isPrefixAtLocation(struct InternalInstruction* insn,
uint8_t prefix,
uint64_t location)
{
- if (insn->prefixPresent[prefix] == 1 &&
- insn->prefixLocations[prefix] == location)
- return true;
- else
- return false;
+ return insn->prefixPresent[prefix] == 1 &&
+ insn->prefixLocations[prefix] == location;
}
/*
@@ -1458,6 +1455,8 @@ static int readModRM(struct InternalInstruction* insn) {
case TYPE_VK1: \
case TYPE_VK8: \
case TYPE_VK16: \
+ if (index > 7) \
+ *valid = 0; \
return prefix##_K0 + index; \
case TYPE_MM64: \
return prefix##_MM0 + (index & 0x7); \
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 70c6042..9e65050 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -485,18 +485,6 @@ struct OperandSpecifier {
uint8_t type;
};
-// Indicates where the opcode modifier (if any) is to be found. Extended
-// opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
-#define MODIFIER_TYPES \
- ENUM_ENTRY(MODIFIER_NONE)
-
-#define ENUM_ENTRY(n) n,
-enum ModifierType {
- MODIFIER_TYPES
- MODIFIER_max
-};
-#undef ENUM_ENTRY
-
static const unsigned X86_MAX_OPERANDS = 6;
/// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 719b761..a400d46 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -76,8 +76,8 @@ class X86AsmBackend : public MCAsmBackend {
bool HasNopl;
const uint64_t MaxNopLength;
public:
- X86AsmBackend(const Target &T, StringRef _CPU)
- : MCAsmBackend(), CPU(_CPU), MaxNopLength(_CPU == "slm" ? 7 : 15) {
+ X86AsmBackend(const Target &T, StringRef CPU)
+ : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
@@ -351,8 +351,8 @@ namespace {
class ELFX86AsmBackend : public X86AsmBackend {
public:
uint8_t OSABI;
- ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU)
- : X86AsmBackend(T, CPU), OSABI(_OSABI) {}
+ ELFX86AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : X86AsmBackend(T, CPU), OSABI(OSABI) {}
};
class ELFX86_32AsmBackend : public ELFX86AsmBackend {
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index e8b0b4c..76a9d2b 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -38,231 +38,214 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
X86ELFObjectWriter::~X86ELFObjectWriter()
{}
-unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- // determine the type of the relocation
+enum X86_64RelType { RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
- MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
- unsigned Type;
- if (getEMachine() == ELF::EM_X86_64) {
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
-
- case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
- case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
- case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
- case FK_Data_1: Type = ELF::R_X86_64_PC8; break;
+static X86_64RelType getType64(unsigned Kind,
+ MCSymbolRefExpr::VariantKind &Modifier,
+ bool &IsPCRel) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case X86::reloc_global_offset_table8:
+ Modifier = MCSymbolRefExpr::VK_GOT;
+ IsPCRel = true;
+ return RT64_64;
+ case FK_Data_8:
+ return RT64_64;
+ case X86::reloc_signed_4byte:
+ if (Modifier == MCSymbolRefExpr::VK_None && !IsPCRel)
+ return RT64_32S;
+ return RT64_32;
+ case X86::reloc_global_offset_table:
+ Modifier = MCSymbolRefExpr::VK_GOT;
+ IsPCRel = true;
+ return RT64_32;
+ case FK_Data_4:
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ return RT64_32;
+ case FK_Data_2:
+ return RT64_16;
+ case FK_PCRel_1:
+ case FK_Data_1:
+ return RT64_8;
+ }
+}
- case FK_PCRel_8:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC64;
- break;
- case X86::reloc_signed_4byte:
- case X86::reloc_riprel_4byte_movq_load:
- case X86::reloc_riprel_4byte:
- case FK_PCRel_4:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_X86_64_PC32;
- break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_X86_64_PLT32;
- break;
- case MCSymbolRefExpr::VK_GOTPCREL:
- Type = ELF::R_X86_64_GOTPCREL;
- break;
- case MCSymbolRefExpr::VK_GOTTPOFF:
- Type = ELF::R_X86_64_GOTTPOFF;
- break;
- case MCSymbolRefExpr::VK_TLSGD:
- Type = ELF::R_X86_64_TLSGD;
- break;
- case MCSymbolRefExpr::VK_TLSLD:
- Type = ELF::R_X86_64_TLSLD;
- break;
- }
- break;
- case FK_PCRel_2:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC16;
- break;
- case FK_PCRel_1:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC8;
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
- case X86::reloc_global_offset_table8:
- Type = ELF::R_X86_64_GOTPC64;
- break;
- case X86::reloc_global_offset_table:
- Type = ELF::R_X86_64_GOTPC32;
- break;
- case FK_Data_8:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_X86_64_64;
- break;
- case MCSymbolRefExpr::VK_GOT:
- Type = ELF::R_X86_64_GOT64;
- break;
- case MCSymbolRefExpr::VK_GOTOFF:
- Type = ELF::R_X86_64_GOTOFF64;
- break;
- case MCSymbolRefExpr::VK_TPOFF:
- Type = ELF::R_X86_64_TPOFF64;
- break;
- case MCSymbolRefExpr::VK_DTPOFF:
- Type = ELF::R_X86_64_DTPOFF64;
- break;
- }
- break;
- case X86::reloc_signed_4byte:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_X86_64_32S;
- break;
- case MCSymbolRefExpr::VK_GOT:
- Type = ELF::R_X86_64_GOT32;
- break;
- case MCSymbolRefExpr::VK_GOTPCREL:
- Type = ELF::R_X86_64_GOTPCREL;
- break;
- case MCSymbolRefExpr::VK_TPOFF:
- Type = ELF::R_X86_64_TPOFF32;
- break;
- case MCSymbolRefExpr::VK_DTPOFF:
- Type = ELF::R_X86_64_DTPOFF32;
- break;
- }
- break;
- case FK_Data_4:
- Type = ELF::R_X86_64_32;
- break;
- case FK_Data_2: Type = ELF::R_X86_64_16; break;
- case FK_PCRel_1:
- case FK_Data_1: Type = ELF::R_X86_64_8; break;
- }
+static unsigned getRelocType64(MCSymbolRefExpr::VariantKind Modifier,
+ X86_64RelType Type, bool IsPCRel) {
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ switch (Type) {
+ case RT64_64:
+ return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
+ case RT64_32:
+ return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32;
+ case RT64_32S:
+ return ELF::R_X86_64_32S;
+ case RT64_16:
+ return IsPCRel ? ELF::R_X86_64_PC16 : ELF::R_X86_64_16;
+ case RT64_8:
+ return IsPCRel ? ELF::R_X86_64_PC8 : ELF::R_X86_64_8;
}
- } else if (getEMachine() == ELF::EM_386) {
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
-
- case X86::reloc_global_offset_table:
- Type = ELF::R_386_GOTPC;
- break;
-
- case FK_PCRel_1:
- case FK_Data_1:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_PC8;
- break;
- }
- break;
-
- case FK_PCRel_2:
- case FK_Data_2:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_PC16;
- break;
- }
- break;
+ case MCSymbolRefExpr::VK_GOT:
+ switch (Type) {
+ case RT64_64:
+ return IsPCRel ? ELF::R_X86_64_GOTPC64 : ELF::R_X86_64_GOT64;
+ case RT64_32:
+ return IsPCRel ? ELF::R_X86_64_GOTPC32 : ELF::R_X86_64_GOT32;
+ case RT64_32S:
+ case RT64_16:
+ case RT64_8:
+ llvm_unreachable("Unimplemented");
+ }
+ case MCSymbolRefExpr::VK_GOTOFF:
+ assert(Type == RT64_64);
+ assert(!IsPCRel);
+ return ELF::R_X86_64_GOTOFF64;
+ case MCSymbolRefExpr::VK_TPOFF:
+ assert(!IsPCRel);
+ switch (Type) {
+ case RT64_64:
+ return ELF::R_X86_64_TPOFF64;
+ case RT64_32:
+ return ELF::R_X86_64_TPOFF32;
+ case RT64_32S:
+ case RT64_16:
+ case RT64_8:
+ llvm_unreachable("Unimplemented");
+ }
+ case MCSymbolRefExpr::VK_DTPOFF:
+ assert(!IsPCRel);
+ switch (Type) {
+ case RT64_64:
+ return ELF::R_X86_64_DTPOFF64;
+ case RT64_32:
+ return ELF::R_X86_64_DTPOFF32;
+ case RT64_32S:
+ case RT64_16:
+ case RT64_8:
+ llvm_unreachable("Unimplemented");
+ }
+ case MCSymbolRefExpr::VK_SIZE:
+ assert(!IsPCRel);
+ switch (Type) {
+ case RT64_64:
+ return ELF::R_X86_64_SIZE64;
+ case RT64_32:
+ return ELF::R_X86_64_SIZE32;
+ case RT64_32S:
+ case RT64_16:
+ case RT64_8:
+ llvm_unreachable("Unimplemented");
+ }
+ case MCSymbolRefExpr::VK_TLSGD:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_TLSGD;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_GOTTPOFF;
+ case MCSymbolRefExpr::VK_TLSLD:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_TLSLD;
+ case MCSymbolRefExpr::VK_PLT:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_PLT32;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_GOTPCREL;
+ }
+}
- case X86::reloc_signed_4byte:
- case FK_PCRel_4:
- case FK_Data_4:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_PC32;
- break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_386_PLT32;
- break;
- }
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
+enum X86_32RelType { RT32_32, RT32_16, RT32_8 };
- case X86::reloc_global_offset_table:
- Type = ELF::R_386_GOTPC;
- break;
+static X86_32RelType getType32(X86_64RelType T) {
+ switch (T) {
+ case RT64_64:
+ llvm_unreachable("Unimplemented");
+ case RT64_32:
+ case RT64_32S:
+ return RT32_32;
+ case RT64_16:
+ return RT32_16;
+ case RT64_8:
+ return RT32_8;
+ }
+ llvm_unreachable("unexpected relocation type!");
+}
- // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
- // instead?
- case X86::reloc_signed_4byte:
- case FK_PCRel_4:
- case FK_Data_4:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_32;
- break;
- case MCSymbolRefExpr::VK_GOT:
- Type = ELF::R_386_GOT32;
- break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_386_PLT32;
- break;
- case MCSymbolRefExpr::VK_GOTOFF:
- Type = ELF::R_386_GOTOFF;
- break;
- case MCSymbolRefExpr::VK_TLSGD:
- Type = ELF::R_386_TLS_GD;
- break;
- case MCSymbolRefExpr::VK_TPOFF:
- Type = ELF::R_386_TLS_LE_32;
- break;
- case MCSymbolRefExpr::VK_INDNTPOFF:
- Type = ELF::R_386_TLS_IE;
- break;
- case MCSymbolRefExpr::VK_NTPOFF:
- Type = ELF::R_386_TLS_LE;
- break;
- case MCSymbolRefExpr::VK_GOTNTPOFF:
- Type = ELF::R_386_TLS_GOTIE;
- break;
- case MCSymbolRefExpr::VK_TLSLDM:
- Type = ELF::R_386_TLS_LDM;
- break;
- case MCSymbolRefExpr::VK_DTPOFF:
- Type = ELF::R_386_TLS_LDO_32;
- break;
- case MCSymbolRefExpr::VK_GOTTPOFF:
- Type = ELF::R_386_TLS_IE_32;
- break;
- }
- break;
- case FK_Data_2: Type = ELF::R_386_16; break;
- case FK_PCRel_1:
- case FK_Data_1: Type = ELF::R_386_8; break;
- }
+static unsigned getRelocType32(MCSymbolRefExpr::VariantKind Modifier,
+ X86_32RelType Type, bool IsPCRel) {
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ switch (Type) {
+ case RT32_32:
+ return IsPCRel ? ELF::R_386_PC32 : ELF::R_386_32;
+ case RT32_16:
+ return IsPCRel ? ELF::R_386_PC16 : ELF::R_386_16;
+ case RT32_8:
+ return IsPCRel ? ELF::R_386_PC8 : ELF::R_386_8;
}
- } else
- llvm_unreachable("Unsupported ELF machine type.");
+ case MCSymbolRefExpr::VK_GOT:
+ assert(Type == RT32_32);
+ return IsPCRel ? ELF::R_386_GOTPC : ELF::R_386_GOT32;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_GOTOFF;
+ case MCSymbolRefExpr::VK_TPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_LE_32;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_LDO_32;
+ case MCSymbolRefExpr::VK_TLSGD:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_GD;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_IE_32;
+ case MCSymbolRefExpr::VK_PLT:
+ assert(Type == RT32_32);
+ return ELF::R_386_PLT32;
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_IE;
+ case MCSymbolRefExpr::VK_NTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_LE;
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_GOTIE;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_LDM;
+ }
+}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
+ X86_64RelType Type = getType64(Fixup.getKind(), Modifier, IsPCRel);
+ if (getEMachine() == ELF::EM_X86_64)
+ return getRelocType64(Modifier, Type, IsPCRel);
- return Type;
+ assert(getEMachine() == ELF::EM_386 && "Unsupported ELF machine type.");
+ return getRelocType32(Modifier, getType32(Type), IsPCRel);
}
MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index b679316..10b83f4 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -36,7 +36,7 @@ public:
MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
- if (Sym->isVariable() == false)
+ if (!Sym->isVariable())
Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
const MCExpr *Expr = nullptr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 3ad8ab1..9b98a3e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -168,10 +168,8 @@ public:
} // end anonymous namespace
-
MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new X86MCCodeEmitter(MCII, Ctx);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 0e7b4e5..0946326 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -55,143 +55,6 @@ std::string X86_MC::ParseX86Triple(StringRef TT) {
return FS;
}
-/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
-/// specified arguments. If we can't run cpuid on the host, return true.
-bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
- #if defined(__GNUC__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
- asm ("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value));
- return false;
- #elif defined(_MSC_VER)
- int registers[4];
- __cpuid(registers, value);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
- return false;
- #else
- return true;
- #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
- #if defined(__GNUC__)
- asm ("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value));
- return false;
- #elif defined(_MSC_VER)
- __asm {
- mov eax,value
- cpuid
- mov esi,rEAX
- mov dword ptr [esi],eax
- mov esi,rEBX
- mov dword ptr [esi],ebx
- mov esi,rECX
- mov dword ptr [esi],ecx
- mov esi,rEDX
- mov dword ptr [esi],edx
- }
- return false;
- #else
- return true;
- #endif
-#else
- return true;
-#endif
-}
-
-/// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
-/// 4 values in the specified arguments. If we can't run cpuid on the host,
-/// return true.
-bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
- #if defined(__GNUC__)
- // gcc desn't know cpuid would clobber ebx/rbx. Preseve it manually.
- asm ("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value),
- "c" (subleaf));
- return false;
- #elif defined(_MSC_VER)
- int registers[4];
- __cpuidex(registers, value, subleaf);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
- return false;
- #else
- return true;
- #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
- #if defined(__GNUC__)
- asm ("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value),
- "c" (subleaf));
- return false;
- #elif defined(_MSC_VER)
- __asm {
- mov eax,value
- mov ecx,subleaf
- cpuid
- mov esi,rEAX
- mov dword ptr [esi],eax
- mov esi,rEBX
- mov dword ptr [esi],ebx
- mov esi,rECX
- mov dword ptr [esi],ecx
- mov esi,rEDX
- mov dword ptr [esi],edx
- }
- return false;
- #else
- return true;
- #endif
-#else
- return true;
-#endif
-}
-
-void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
- unsigned &Model) {
- Family = (EAX >> 8) & 0xf; // Bits 8 - 11
- Model = (EAX >> 4) & 0xf; // Bits 4 - 7
- if (Family == 6 || Family == 0xf) {
- if (Family == 0xf)
- // Examine extended family ID if family ID is F.
- Family += (EAX >> 20) & 0xff; // Bits 20 - 27
- // Examine extended model ID if family ID is 6 or F.
- Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
- }
-}
-
unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) {
if (TT.getArch() == Triple::x86_64)
return DWARFFlavour::X86_64;
@@ -344,24 +207,6 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS, MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
-
- switch (TheTriple.getObjectFormat()) {
- default: llvm_unreachable("unsupported object format");
- case Triple::MachO:
- return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
- case Triple::COFF:
- assert(TheTriple.isOSWindows() && "only Windows COFF is supported");
- return createX86WinCOFFStreamer(Ctx, MAB, _Emitter, _OS, RelaxAll);
- case Triple::ELF:
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
- }
-}
-
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
@@ -392,61 +237,42 @@ static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
// Force static initialization.
extern "C" void LLVMInitializeX86TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
- RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
-
- // Register the MC codegen info.
- RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo);
- RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
- X86_MC::createX86MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
- X86_MC::createX86MCSubtargetInfo);
-
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target,
- createX86MCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target,
- createX86MCInstrAnalysis);
-
- // Register the code emitter.
- TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target,
- createX86MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target,
- createX86MCCodeEmitter);
+ for (Target *T : {&TheX86_32Target, &TheX86_64Target}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createX86MCAsmInfo);
+
+ // Register the MC codegen info.
+ RegisterMCCodeGenInfoFn Y(*T, createX86MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createX86MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createX86MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T,
+ X86_MC::createX86MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createX86MCInstrAnalysis);
+
+ // Register the code emitter.
+ TargetRegistry::RegisterMCCodeEmitter(*T, createX86MCCodeEmitter);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterCOFFStreamer(*T, createX86WinCOFFStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createX86MCInstPrinter);
+
+ // Register the MC relocation info.
+ TargetRegistry::RegisterMCRelocationInfo(*T, createX86MCRelocationInfo);
+ }
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheX86_32Target,
createX86_32AsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
createX86_64AsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target,
- createMCStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,
- createX86MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,
- createX86MCInstPrinter);
-
- // Register the MC relocation info.
- TargetRegistry::RegisterMCRelocationInfo(TheX86_32Target,
- createX86MCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheX86_64Target,
- createX86MCRelocationInfo);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index d8320b9..6f50f11 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -53,18 +53,6 @@ namespace N86 {
namespace X86_MC {
std::string ParseX86Triple(StringRef TT);
- /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
- /// the specified arguments. If we can't run cpuid on the host, return true.
- bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
- /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
- /// the 4 values in the specified arguments. If we can't run cpuid on the
- /// host, return true.
- bool GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
-
- void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
-
unsigned getDwarfRegFlavour(Triple TT, bool isEH);
void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
@@ -78,7 +66,6 @@ namespace X86_MC {
MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
@@ -86,12 +73,12 @@ MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-/// createX86WinCOFFStreamer - Construct an X86 Windows COFF machine code
-/// streamer which will generate PE/COFF format object files.
+/// Construct an X86 Windows COFF machine code streamer which will generate
+/// PE/COFF format object files.
///
/// Takes ownership of \p AB and \p CE.
MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
- MCCodeEmitter *CE, raw_ostream &OS,
+ raw_ostream &OS, MCCodeEmitter *CE,
bool RelaxAll);
/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index 3b81d53..81749fc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -38,7 +38,7 @@ public:
MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
- if (Sym->isVariable() == false)
+ if (!Sym->isVariable())
Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
const MCExpr *Expr = nullptr;
@@ -93,7 +93,7 @@ public:
RSymI->getName(RSymName);
MCSymbol *RSym = Ctx.GetOrCreateSymbol(RSymName);
- if (RSym->isVariable() == false)
+ if (!RSym->isVariable())
RSym->setVariableValue(MCConstantExpr::Create(RSymAddr, Ctx));
const MCExpr *RHS = MCSymbolRefExpr::Create(RSym, Ctx);
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 5f1596c..5690efe 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -48,13 +48,11 @@ void X86WinCOFFStreamer::FinishImpl() {
}
}
-namespace llvm {
-MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
- MCCodeEmitter *CE, raw_ostream &OS,
- bool RelaxAll) {
+MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
S->getAssembler().setRelaxAll(RelaxAll);
return S;
}
-}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 71329b0..e6896e8 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -93,36 +93,6 @@ The pattern isel got this one right.
//===---------------------------------------------------------------------===//
-SSE should implement 'select_cc' using 'emulated conditional moves' that use
-pcmp/pand/pandn/por to do a selection instead of a conditional branch:
-
-double %X(double %Y, double %Z, double %A, double %B) {
- %C = setlt double %A, %B
- %z = fadd double %Z, 0.0 ;; select operand is not a load
- %D = select bool %C, double %Y, double %z
- ret double %D
-}
-
-We currently emit:
-
-_X:
- subl $12, %esp
- xorpd %xmm0, %xmm0
- addsd 24(%esp), %xmm0
- movsd 32(%esp), %xmm1
- movsd 16(%esp), %xmm2
- ucomisd 40(%esp), %xmm1
- jb LBB_X_2
-LBB_X_1:
- movsd %xmm0, %xmm2
-LBB_X_2:
- movsd %xmm2, (%esp)
- fldl (%esp)
- addl $12, %esp
- ret
-
-//===---------------------------------------------------------------------===//
-
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
feasible.
@@ -787,25 +757,6 @@ cheaper to do fld1 than load from a constant pool for example, so
//===---------------------------------------------------------------------===//
-The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to
-"cmpsd". For example, this code:
-
-double d1(double x) { return x == x ? x : x + x; }
-
-Compiles into:
-
-_d1:
- ucomisd %xmm0, %xmm0
- jnp LBB1_2
- addsd %xmm0, %xmm0
- ret
-LBB1_2:
- ret
-
-Also, the 'ret's should be shared. This is PR6032.
-
-//===---------------------------------------------------------------------===//
-
These should compile into the same code (PR6214): Perhaps instcombine should
canonicalize the former into the later?
@@ -858,35 +809,6 @@ doing a shuffle from v[1] to v[0] then a float store.
//===---------------------------------------------------------------------===//
-On SSE4 machines, we compile this code:
-
-define <2 x float> @test2(<2 x float> %Q, <2 x float> %R,
- <2 x float> *%P) nounwind {
- %Z = fadd <2 x float> %Q, %R
-
- store <2 x float> %Z, <2 x float> *%P
- ret <2 x float> %Z
-}
-
-into:
-
-_test2: ## @test2
-## BB#0:
- insertps $0, %xmm2, %xmm2
- insertps $16, %xmm3, %xmm2
- insertps $0, %xmm0, %xmm3
- insertps $16, %xmm1, %xmm3
- addps %xmm2, %xmm3
- movq %xmm3, (%rdi)
- movaps %xmm3, %xmm0
- pshufd $1, %xmm3, %xmm1
- ## kill: XMM1<def> XMM1<kill>
- ret
-
-The insertps's of $0 are pointless complex copies.
-
-//===---------------------------------------------------------------------===//
-
[UNSAFE FP]
void foo(double, double, double);
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index bb0b9ce..f6033a7 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -63,9 +63,6 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer.EndCOFFSymbolDef();
}
- // Have common code print out the function header with linkage info etc.
- EmitFunctionHeader();
-
// Emit the rest of the function body.
EmitFunctionBody();
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index a17f052..cba140f 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -84,7 +84,7 @@ private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
- unsigned &ResultReg);
+ unsigned &ResultReg, unsigned Alignment = 1);
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
MachineMemOperand *MMO = nullptr, bool Aligned = false);
@@ -327,7 +327,8 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
- MachineMemOperand *MMO, unsigned &ResultReg) {
+ MachineMemOperand *MMO, unsigned &ResultReg,
+ unsigned Alignment) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
@@ -372,6 +373,30 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
case MVT::f80:
// No f80 support yet.
return false;
+ case MVT::v4f32:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+ RC = &X86::VR128RegClass;
+ break;
+ case MVT::v2f64:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+ RC = &X86::VR128RegClass;
+ break;
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+ RC = &X86::VR128RegClass;
+ break;
}
ResultReg = createResultReg(RC);
@@ -1068,8 +1093,14 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
if (!X86SelectAddress(Ptr, AM))
return false;
+ unsigned Alignment = LI->getAlignment();
+ unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = ABIAlignment;
+
unsigned ResultReg = 0;
- if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
+ if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
+ Alignment))
return false;
updateValueMap(I, ResultReg);
@@ -1094,20 +1125,30 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
}
}
-/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
-/// of the comparison, return an opcode that works for the compare (e.g.
-/// CMP32ri) otherwise return 0.
+/// If we have a comparison with RHS as the RHS of the comparison, return an
+/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+ int64_t Val = RHSC->getSExtValue();
switch (VT.getSimpleVT().SimpleTy) {
// Otherwise, we can't fold the immediate into this comparison.
- default: return 0;
- case MVT::i8: return X86::CMP8ri;
- case MVT::i16: return X86::CMP16ri;
- case MVT::i32: return X86::CMP32ri;
+ default:
+ return 0;
+ case MVT::i8:
+ return X86::CMP8ri;
+ case MVT::i16:
+ if (isInt<8>(Val))
+ return X86::CMP16ri8;
+ return X86::CMP16ri;
+ case MVT::i32:
+ if (isInt<8>(Val))
+ return X86::CMP32ri8;
+ return X86::CMP32ri;
case MVT::i64:
+ if (isInt<8>(Val))
+ return X86::CMP64ri8;
// 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
// field.
- if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ if (isInt<32>(Val))
return X86::CMP64ri32;
return 0;
}
@@ -1810,11 +1851,11 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return true;
}
-/// \brief Emit SSE instructions to lower the select.
+/// \brief Emit SSE or AVX instructions to lower the select.
///
/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
-/// SSE instructions are available.
+/// SSE instructions are available. If AVX is available, try to use a VBLENDV.
bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
// Optimize conditions coming from a compare if both instructions are in the
// same basic block (values defined in other basic blocks may not have
@@ -1850,19 +1891,17 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- static unsigned OpcTable[2][2][4] = {
- { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
- { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
- { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
- { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ // Choose the SSE instruction sequence based on data type (float or double).
+ static unsigned OpcTable[2][4] = {
+ { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }
};
- bool HasAVX = Subtarget->hasAVX();
unsigned *Opc = nullptr;
switch (RetVT.SimpleTy) {
default: return false;
- case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
- case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ case MVT::f32: Opc = &OpcTable[0][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][0]; break;
}
const Value *LHS = I->getOperand(1);
@@ -1884,14 +1923,33 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
return false;
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
- unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
- CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
- LHSReg, LHSIsKill);
- unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
- RHSReg, RHSIsKill);
- unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
- AndReg, /*IsKill=*/true);
+ unsigned ResultReg;
+
+ if (Subtarget->hasAVX()) {
+ // If we have AVX, create 1 blendv instead of 3 logic instructions.
+ // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
+ // uses XMM0 as the selection register. That may need just as many
+ // instructions as the AND/ANDN/OR sequence due to register moves, so
+ // don't bother.
+ unsigned CmpOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
+ unsigned BlendOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
+
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CmpReg, true);
+ } else {
+ unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ }
updateValueMap(I, ResultReg);
return true;
}
@@ -2015,38 +2073,30 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
if (OpReg == 0)
return false;
- bool HasAVX = Subtarget->hasAVX();
const TargetRegisterClass *RC = nullptr;
unsigned Opcode;
- if (I->getType()->isDoubleTy() && X86ScalarSSEf64) {
+ if (I->getType()->isDoubleTy()) {
// sitofp int -> double
- Opcode = HasAVX ? X86::VCVTSI2SDrr : X86::CVTSI2SDrr;
+ Opcode = X86::VCVTSI2SDrr;
RC = &X86::FR64RegClass;
- } else if (I->getType()->isFloatTy() && X86ScalarSSEf32) {
+ } else if (I->getType()->isFloatTy()) {
// sitofp int -> float
- Opcode = HasAVX ? X86::VCVTSI2SSrr : X86::CVTSI2SSrr;
+ Opcode = X86::VCVTSI2SSrr;
RC = &X86::FR32RegClass;
} else
return false;
+ // The target-independent selection algorithm in FastISel already knows how
+ // to select a SINT_TO_FP if the target is SSE but not AVX. This code is only
+ // reachable if the subtarget has AVX.
+ assert(Subtarget->hasAVX() && "Expected a subtarget with AVX!");
- unsigned ImplicitDefReg = 0;
- if (HasAVX) {
- ImplicitDefReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
- }
-
- const MCInstrDesc &II = TII.get(Opcode);
- OpReg = constrainOperandRegClass(II, OpReg, (HasAVX ? 2 : 1));
-
- unsigned ResultReg = createResultReg(RC);
- MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
- if (ImplicitDefReg)
- MIB.addReg(ImplicitDefReg, RegState::Kill);
- MIB.addReg(OpReg);
+ unsigned ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+ unsigned ResultReg =
+ fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
updateValueMap(I, ResultReg);
return true;
}
@@ -3053,7 +3103,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask operand representing the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Add an implicit use GOT pointer in EBX.
if (Subtarget->isPICStyleGOT())
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index c8e5f64..3b0bd03 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -32,10 +32,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
@@ -300,7 +300,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// function. If it is all integer, there is nothing for us to do!
bool FPIsUsed = false;
- assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+ static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
for (unsigned i = 0; i <= 6; ++i)
if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
FPIsUsed = true;
@@ -438,7 +438,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
// Rewind to first instruction newly inserted.
while (Start != BB.begin() && std::prev(Start) != PrevI) --Start;
dbgs() << "Inserted instructions:\n\t";
- Start->print(dbgs(), &MF.getTarget());
+ Start->print(dbgs());
while (++Start != std::next(I)) {}
}
dumpStack();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index cead099..1d2c73c 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -581,7 +581,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
bool Is64Bit = STI.is64Bit();
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
- bool IsWin64 = STI.isTargetWin64();
+ bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv());
// Not necessarily synonymous with IsWin64.
bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 8d50ae1..fb12ce5 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -228,7 +228,7 @@ namespace {
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
void EmitSpecialCodeForMain();
@@ -1004,6 +1004,15 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
switch (N.getOpcode()) {
default: break;
+ case ISD::FRAME_ALLOC_RECOVER: {
+ if (!AM.hasSymbolicDisplacement())
+ if (const auto *ESNode = dyn_cast<ExternalSymbolSDNode>(N.getOperand(0)))
+ if (ESNode->getOpcode() == ISD::TargetExternalSymbol) {
+ AM.ES = ESNode->getSymbol();
+ return false;
+ }
+ break;
+ }
case ISD::Constant: {
uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
if (!FoldOffsetIntoAddress(Val, AM))
@@ -2805,14 +2814,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
bool X86DAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1, Op2, Op3, Op4;
- switch (ConstraintCode) {
- case 'o': // offsetable ??
- case 'v': // not offsetable ??
+ switch (ConstraintID) {
+ case InlineAsm::Constraint_o: // offsetable ??
+ case InlineAsm::Constraint_v: // not offsetable ??
default: return true;
- case 'm': // memory
+ case InlineAsm::Constraint_m: // memory
if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6866be7..8b92e70 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -25,7 +25,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/VariadicFunction.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -77,119 +76,6 @@ static cl::opt<int> ReciprocalEstimateRefinementSteps(
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
-static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl,
- unsigned vectorWidth) {
- assert((vectorWidth == 128 || vectorWidth == 256) &&
- "Unsupported vector width");
- EVT VT = Vec.getValueType();
- EVT ElVT = VT.getVectorElementType();
- unsigned Factor = VT.getSizeInBits()/vectorWidth;
- EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
- VT.getVectorNumElements()/Factor);
-
- // Extract from UNDEF is UNDEF.
- if (Vec.getOpcode() == ISD::UNDEF)
- return DAG.getUNDEF(ResultVT);
-
- // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
- unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
-
- // This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
- * ElemsPerChunk);
-
- // If the input is a buildvector just emit a smaller one.
- if (Vec.getOpcode() == ISD::BUILD_VECTOR)
- return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
- ElemsPerChunk));
-
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
-}
-
-/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
-/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
-/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
-/// instructions or a simple subregister reference. Idx is an index in the
-/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
-/// lowering EXTRACT_VECTOR_ELT operations easier.
-static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert((Vec.getValueType().is256BitVector() ||
- Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
- return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
-}
-
-/// Generate a DAG to grab 256-bits from a 512-bit vector.
-static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
- return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
-}
-
-static SDValue InsertSubVector(SDValue Result, SDValue Vec,
- unsigned IdxVal, SelectionDAG &DAG,
- SDLoc dl, unsigned vectorWidth) {
- assert((vectorWidth == 128 || vectorWidth == 256) &&
- "Unsupported vector width");
- // Inserting UNDEF is Result
- if (Vec.getOpcode() == ISD::UNDEF)
- return Result;
- EVT VT = Vec.getValueType();
- EVT ElVT = VT.getVectorElementType();
- EVT ResultVT = Result.getValueType();
-
- // Insert the relevant vectorWidth bits.
- unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
-
- // This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
- * ElemsPerChunk);
-
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
-}
-
-/// Generate a DAG to put 128-bits into a vector > 128 bits. This
-/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
-/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
-/// simple superregister reference. Idx is an index in the 128 bits
-/// we want. It need not be aligned to a 128-bit boundary. That makes
-/// lowering INSERT_VECTOR_ELT operations easier.
-static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG,SDLoc dl) {
- assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
- return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
-}
-
-static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
- return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
-}
-
-/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
-/// instructions. This is used because creating CONCAT_VECTOR nodes of
-/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
-/// large BUILD_VECTORS.
-static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- SDLoc dl) {
- SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
- return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
-}
-
-static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- SDLoc dl) {
- SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
- return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
-}
-
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -871,35 +757,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// MMX-sized vectors (other than x86mmx) are expected to be expanded
// into smaller operations.
- setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
- setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
- setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
- setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
- setOperationAction(ISD::AND, MVT::v8i8, Expand);
- setOperationAction(ISD::AND, MVT::v4i16, Expand);
- setOperationAction(ISD::AND, MVT::v2i32, Expand);
- setOperationAction(ISD::AND, MVT::v1i64, Expand);
- setOperationAction(ISD::OR, MVT::v8i8, Expand);
- setOperationAction(ISD::OR, MVT::v4i16, Expand);
- setOperationAction(ISD::OR, MVT::v2i32, Expand);
- setOperationAction(ISD::OR, MVT::v1i64, Expand);
- setOperationAction(ISD::XOR, MVT::v8i8, Expand);
- setOperationAction(ISD::XOR, MVT::v4i16, Expand);
- setOperationAction(ISD::XOR, MVT::v2i32, Expand);
- setOperationAction(ISD::XOR, MVT::v1i64, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
+ for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
+ setOperationAction(ISD::MULHS, MMXTy, Expand);
+ setOperationAction(ISD::AND, MMXTy, Expand);
+ setOperationAction(ISD::OR, MMXTy, Expand);
+ setOperationAction(ISD::XOR, MMXTy, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand);
+ setOperationAction(ISD::SELECT, MMXTy, Expand);
+ setOperationAction(ISD::BITCAST, MMXTy, Expand);
+ }
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
- setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
- setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
- setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
- setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
- setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
- setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
- setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
@@ -1065,27 +932,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
-
- setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
- setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
- setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
+ setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
+ }
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1474,7 +1327,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
@@ -1576,6 +1428,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SUB, MVT::v32i16, Legal);
setOperationAction(ISD::SUB, MVT::v64i8, Legal);
setOperationAction(ISD::MUL, MVT::v32i16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
@@ -1599,7 +1455,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
@@ -3189,7 +3048,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3906,21 +3765,6 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
return true;
}
-/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
-/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
- unsigned NumElems) {
- for (unsigned i = 0; i != NumElems; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElems)
- Mask[i] = idx + NumElems;
- else
- Mask[i] = idx - NumElems;
- }
-}
-
/// isVEXTRACTIndex - Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for instruction that extract 128 or 256 bit vectors
@@ -4083,9 +3927,13 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
} else if (VT.getScalarType() == MVT::i1) {
- assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
+
+ assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
+ && "Unexpected vector type");
+ assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8)
+ && "Unexpected vector type");
SDValue Cst = DAG.getConstant(0, MVT::i1);
- SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
+ SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
} else
llvm_unreachable("Unexpected vector type");
@@ -4093,6 +3941,162 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
+static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl,
+ unsigned vectorWidth) {
+ assert((vectorWidth == 128 || vectorWidth == 256) &&
+ "Unsupported vector width");
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ unsigned Factor = VT.getSizeInBits()/vectorWidth;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements()/Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(ResultVT);
+
+ // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
+ unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the vectorWidth-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
+ * ElemsPerChunk);
+
+ // If the input is a buildvector just emit a smaller one.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+ makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
+ ElemsPerChunk));
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
+}
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
+/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
+/// instructions or a simple subregister reference. Idx is an index in the
+/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert((Vec.getValueType().is256BitVector() ||
+ Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
+ return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
+}
+
+/// Generate a DAG to grab 256-bits from a 512-bit vector.
+static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
+ return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
+}
+
+static SDValue InsertSubVector(SDValue Result, SDValue Vec,
+ unsigned IdxVal, SelectionDAG &DAG,
+ SDLoc dl, unsigned vectorWidth) {
+ assert((vectorWidth == 128 || vectorWidth == 256) &&
+ "Unsupported vector width");
+ // Inserting UNDEF is Result
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return Result;
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant vectorWidth bits.
+ unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+
+ // This is the index of the first element of the vectorWidth-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
+/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit boundary. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
+
+ // For insertion into the zero index (low half) of a 256-bit vector, it is
+ // more efficient to generate a blend with immediate instead of an insert*128.
+ // We are still creating an INSERT_SUBVECTOR below with an undef node to
+ // extend the subvector to the size of the result vector. Make sure that
+ // we are not recursing on that node by checking for undef here.
+ if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
+ Result.getOpcode() != ISD::UNDEF) {
+ EVT ResultVT = Result.getValueType();
+ SDValue ZeroIndex = DAG.getIntPtrConstant(0);
+ SDValue Undef = DAG.getUNDEF(ResultVT);
+ SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
+ Vec, ZeroIndex);
+
+ // The blend instruction, and therefore its mask, depend on the data type.
+ MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
+ if (ScalarType.isFloatingPoint()) {
+ // Choose either vblendps (float) or vblendpd (double).
+ unsigned ScalarSize = ScalarType.getSizeInBits();
+ assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
+ unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
+ SDValue Mask = DAG.getConstant(MaskVal, MVT::i8);
+ return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
+ }
+
+ const X86Subtarget &Subtarget =
+ static_cast<const X86Subtarget &>(DAG.getSubtarget());
+
+ // AVX2 is needed for 256-bit integer blend support.
+ // Integers must be cast to 32-bit because there is only vpblendd;
+ // vpblendw can't be used for this because it has a handicapped mask.
+
+ // If we don't have AVX2, then cast to float. Using a wrong domain blend
+ // is still more efficient than using the wrong domain vinsertf128 that
+ // will be created by InsertSubVector().
+ MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
+
+ SDValue Mask = DAG.getConstant(0x0f, MVT::i8);
+ Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
+ Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
+ return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
+ }
+
+ return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
+}
+
+static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
+ return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
+}
+
+/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
+/// instructions. This is used because creating CONCAT_VECTOR nodes of
+/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
+/// large BUILD_VECTORS.
+static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
+static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
/// getOnesVector - Returns a vector of specified type with all bits set.
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
@@ -5567,8 +5571,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
- SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
- if (Broadcast.getNode())
+ if (SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG))
return Broadcast;
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -5635,12 +5638,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.is256BitVector() || VT.is512BitVector()) {
+ if (VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
- assert(VT.is128BitVector() && "Expected an SSE value type!");
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -5742,24 +5746,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
// If element VT is < 32 bits, convert it to inserts into a zero vector.
- if (EVTBits == 8 && NumElems == 16) {
- SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
- Subtarget, *this);
- if (V.getNode()) return V;
- }
+ if (EVTBits == 8 && NumElems == 16)
+ if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ Subtarget, *this))
+ return V;
- if (EVTBits == 16 && NumElems == 8) {
- SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- Subtarget, *this);
- if (V.getNode()) return V;
- }
+ if (EVTBits == 16 && NumElems == 8)
+ if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ Subtarget, *this))
+ return V;
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
- if (EVTBits == 32 && NumElems == 4) {
- SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this);
- if (V.getNode())
+ if (EVTBits == 32 && NumElems == 4)
+ if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this))
return V;
- }
// If element VT is == 32 bits, turn it into a number of shuffles.
SmallVector<SDValue, 8> V(NumElems);
@@ -5807,13 +5807,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
V[i] = Op.getOperand(i);
// Check for elements which are consecutive loads.
- SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
- if (LD.getNode())
+ if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false))
return LD;
// Check for a build vector from mostly shuffle plus few inserting.
- SDValue Sh = buildFromShuffleMostly(Op, DAG);
- if (Sh.getNode())
+ if (SDValue Sh = buildFromShuffleMostly(Op, DAG))
return Sh;
// For SSE 4.1, use insertps to put the high elements into the low element.
@@ -5893,8 +5891,64 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
-static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
- MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType();
+static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
+ const X86Subtarget *Subtarget,
+ SelectionDAG & DAG) {
+ SDLoc dl(Op);
+ MVT ResVT = Op.getSimpleValueType();
+ unsigned NumOfOperands = Op.getNumOperands();
+
+ assert(isPowerOf2_32(NumOfOperands) &&
+ "Unexpected number of operands in CONCAT_VECTORS");
+
+ if (NumOfOperands > 2) {
+ MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
+ ResVT.getVectorNumElements()/2);
+ SmallVector<SDValue, 2> Ops;
+ for (unsigned i = 0; i < NumOfOperands/2; i++)
+ Ops.push_back(Op.getOperand(i));
+ SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
+ Ops.clear();
+ for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++)
+ Ops.push_back(Op.getOperand(i));
+ SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+ }
+
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ if (IsZeroV1 && IsZeroV2)
+ return getZeroVector(ResVT, Subtarget, DAG, dl);
+
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0);
+ SDValue Undef = DAG.getUNDEF(ResVT);
+ unsigned NumElems = ResVT.getVectorNumElements();
+ SDValue ShiftBits = DAG.getConstant(NumElems/2, MVT::i8);
+
+ V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx);
+ V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits);
+ if (IsZeroV1)
+ return V2;
+
+ V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
+ // Zero the upper bits of V1
+ V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits);
+ V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits);
+ if (IsZeroV2)
+ return V1;
+ return DAG.getNode(ISD::OR, dl, ResVT, V1, V2);
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getVectorElementType() == MVT::i1)
+ return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG);
+
assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
(VT.is512BitVector() && (Op.getNumOperands() == 2 ||
Op.getNumOperands() == 4)));
@@ -6935,8 +6989,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
"a sorted mask where the broadcast "
"comes from V1.");
- // Go up the chain of (vector) values to try and find a scalar load that
- // we can combine with the broadcast.
+ // Go up the chain of (vector) values to find a scalar load that we can
+ // combine with the broadcast.
for (;;) {
switch (V.getOpcode()) {
case ISD::CONCAT_VECTORS: {
@@ -6973,12 +7027,12 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
- // If the scalar isn't a load we can't broadcast from it in AVX1, only with
- // AVX2.
+ // If the scalar isn't a load, we can't broadcast from it in AVX1.
+ // Only AVX2 has register broadcasts.
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
} else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
- // We can't broadcast from a vector register w/o AVX2, and we can only
+ // We can't broadcast from a vector register without AVX2, and we can only
// broadcast from the zero-element of a vector register.
return SDValue();
}
@@ -7689,10 +7743,18 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
/// The exact breakdown of how to form these dword pairs and align them on the
/// correct sides is really tricky. See the comments within the function for
/// more of the details.
+///
+/// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each
+/// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to
+/// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
+/// vector, form the analogous 128-bit 8-element Mask.
static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
- SDLoc DL, SDValue V, MutableArrayRef<int> Mask,
+ SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
- assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+ assert(VT.getScalarType() == MVT::i16 && "Bad input type!");
+ MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
+
+ assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
MutableArrayRef<int> LoMask = Mask.slice(0, 4);
MutableArrayRef<int> HiMask = Mask.slice(4, 4);
@@ -7845,9 +7907,9 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
int PSHUFDMask[] = {0, 1, 2, 3};
PSHUFDMask[ADWord] = BDWord;
PSHUFDMask[BDWord] = ADWord;
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
- DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
+ V = DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT,
+ DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
// Adjust the mask to match the new locations of A and B.
@@ -7859,8 +7921,8 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
// Recurse back into this routine to re-compute state now that this isn't
// a 3 and 1 problem.
- return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
- Mask);
+ return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget,
+ DAG);
};
if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
@@ -8083,15 +8145,15 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
// Now enact all the shuffles we've computed to move the inputs into their
// target half.
if (!isNoopShuffleMask(PSHUFLMask))
- V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
+ V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG));
if (!isNoopShuffleMask(PSHUFHMask))
- V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
+ V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG));
if (!isNoopShuffleMask(PSHUFDMask))
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
- DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
+ V = DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT,
+ DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
// At this point, each half should contain all its inputs, and we can then
@@ -8105,7 +8167,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
// Do a half shuffle for the low mask.
if (!isNoopShuffleMask(LoMask))
- V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
+ V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
getV4X86ShuffleImm8ForMask(LoMask, DAG));
// Do a half shuffle with the high mask after shifting its values down.
@@ -8113,7 +8175,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
if (M >= 0)
M -= 4;
if (!isNoopShuffleMask(HiMask))
- V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
+ V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
getV4X86ShuffleImm8ForMask(HiMask, DAG));
return V;
@@ -8232,8 +8294,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask, Subtarget, DAG))
return Rotate;
- return lowerV8I16GeneralSingleInputVectorShuffle(DL, V1, Mask, Subtarget,
- DAG);
+ return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1, Mask,
+ Subtarget, DAG);
}
assert(std::any_of(Mask.begin(), Mask.end(), isV1) &&
@@ -8946,7 +9008,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
int LaneSize = Mask.size() / 2;
// If there are only inputs from one 128-bit lane, splitting will in fact be
- // less expensive. The flags track wether the given lane contains an element
+ // less expensive. The flags track whether the given lane contains an element
// that crosses to another lane.
bool LaneCrossing[2] = {false, false};
for (int i = 0, Size = Mask.size(); i < Size; ++i)
@@ -8986,34 +9048,78 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
+ // TODO: If minimizing size and one of the inputs is a zero vector and the
+ // the zero vector has only one use, we could use a VPERM2X128 to save the
+ // instruction bytes needed to explicitly generate the zero vector.
+
// Blends are faster and handle all the non-lane-crossing cases.
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
Subtarget, DAG))
return Blend;
- MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
- // Check for patterns which can be matched with a single insert of a 128-bit
- // subvector.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}) ||
- isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
- SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
- DAG.getIntPtrConstant(0));
- SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
- Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
- }
- if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 6, 7})) {
- SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
- DAG.getIntPtrConstant(0));
- SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
- DAG.getIntPtrConstant(2));
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ // If either input operand is a zero vector, use VPERM2X128 because its mask
+ // allows us to replace the zero input with an implicit zero.
+ if (!IsV1Zero && !IsV2Zero) {
+ // Check for patterns which can be matched with a single insert of a 128-bit
+ // subvector.
+ bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
+ if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+ OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
+ }
+
+ // Otherwise form a 128-bit permutation. After accounting for undefs,
+ // convert the 64-bit shuffle mask selection values into 128-bit
+ // selection bits by dividing the indexes by 2 and shifting into positions
+ // defined by a vperm2*128 instruction's immediate control byte.
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ int MaskLO = Mask[0];
+ if (MaskLO == SM_SentinelUndef)
+ MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
+
+ int MaskHI = Mask[2];
+ if (MaskHI == SM_SentinelUndef)
+ MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
+
+ unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
+
+ // If either input is a zero vector, replace it with an undef input.
+ // Shuffle mask values < 4 are selecting elements of V1.
+ // Shuffle mask values >= 4 are selecting elements of V2.
+ // Adjust each half of the permute mask by clearing the half that was
+ // selecting the zero vector and setting the zero mask bit.
+ if (IsV1Zero) {
+ V1 = DAG.getUNDEF(VT);
+ if (MaskLO < 4)
+ PermMask = (PermMask & 0xf0) | 0x08;
+ if (MaskHI < 4)
+ PermMask = (PermMask & 0x0f) | 0x80;
+ }
+ if (IsV2Zero) {
+ V2 = DAG.getUNDEF(VT);
+ if (MaskLO >= 4)
+ PermMask = (PermMask & 0xf0) | 0x08;
+ if (MaskHI >= 4)
+ PermMask = (PermMask & 0x0f) | 0x80;
}
- // Otherwise form a 128-bit permutation.
- // FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
- unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, MVT::i8));
}
@@ -9326,6 +9432,15 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ // If we have a single input to the zero element, insert that into V1 if we
+ // can do so cheaply.
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; });
+ if (NumV2Elements == 1 && Mask[0] >= 8)
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
@@ -9557,6 +9672,15 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
Mask, DAG);
+ SmallVector<int, 8> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
+ // As this is a single-input shuffle, the repeated mask should be
+ // a strictly valid v8i16 mask that we can pass through to the v8i16
+ // lowering to handle even the v16 case.
+ return lowerV8I16GeneralSingleInputVectorShuffle(
+ DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
+ }
+
SDValue PSHUFBMask[32];
for (int i = 0; i < 16; ++i) {
if (Mask[i] == -1) {
@@ -10118,8 +10242,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
// Try to lower this to a blend-style vector shuffle. This can handle all
// constant condition cases.
- SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG);
- if (BlendOp.getNode())
+ if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
return BlendOp;
// Variable blends are only legal from SSE4.1 onward.
@@ -10421,17 +10544,31 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
// into that, and then insert the subvector back into the result.
if (VT.is256BitVector() || VT.is512BitVector()) {
- // Get the desired 128-bit vector half.
+ // With a 256-bit vector, we can insert into the zero element efficiently
+ // using a blend if we have AVX or AVX2 and the right data type.
+ if (VT.is256BitVector() && IdxVal == 0) {
+ // TODO: It is worthwhile to cast integer to floating point and back
+ // and incur a domain crossing penalty if that's what we'll end up
+ // doing anyway after extracting to a 128-bit vector.
+ if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
+ (Subtarget->hasAVX2() && EltVT == MVT::i32)) {
+ SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
+ N2 = DAG.getIntPtrConstant(1);
+ return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2);
+ }
+ }
+
+ // Get the desired 128-bit vector chunk.
SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
- // Insert the element into the desired half.
+ // Insert the element into the desired chunk.
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, MVT::i32));
- // Insert the changed part back to the 256-bit vector
+ // Insert the changed part back into the bigger vector
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
@@ -10456,16 +10593,29 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
}
if (EltVT == MVT::f32) {
- // Bits [7:6] of the constant are the source select. This will always be
- // zero here. The DAG Combiner may combine an extract_elt index into
- // these
- // bits. For example (insert (extract, 3), 2) could be matched by
- // putting
- // the '3' into bits [7:6] of X86ISD::INSERTPS.
- // Bits [5:4] of the constant are the destination select. This is the
- // value of the incoming immediate.
- // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into
+ // these bits. For example (insert (extract, 3), 2) could be matched by
+ // putting the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool MinSize = F->hasFnAttribute(Attribute::MinSize);
+ if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
+ // If this is an insertion of 32-bits into the low 32-bits of
+ // a vector, we prefer to generate a blend with immediate rather
+ // than an insertps. Blends are simpler operations in hardware and so
+ // will always have equal or better performance than insertps.
+ // But if optimizing for size and there's a load folding opportunity,
+ // generate insertps because blendps does not have a 32-bit memory
+ // operand form.
+ N2 = DAG.getIntPtrConstant(1);
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+ return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2);
+ }
N2 = DAG.getIntPtrConstant(IdxVal << 4);
// Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
@@ -10593,6 +10743,37 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
if (OpVT.is512BitVector() && SubVecVT.is256BitVector())
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
+ if (OpVT.getVectorElementType() == MVT::i1) {
+ if (IdxVal == 0 && Vec.getOpcode() == ISD::UNDEF) // the operation is legal
+ return Op;
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0);
+ SDValue Undef = DAG.getUNDEF(OpVT);
+ unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue ShiftBits = DAG.getConstant(NumElems/2, MVT::i8);
+
+ if (IdxVal == OpVT.getVectorNumElements() / 2) {
+ // Zero upper bits of the Vec
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+
+ SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
+ SubVec, ZeroIdx);
+ Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
+ return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
+ }
+ if (IdxVal == 0) {
+ SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
+ SubVec, ZeroIdx);
+ // Zero upper bits of the Vec2
+ Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
+ Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits);
+ // Zero lower bits of the Vec
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+ // Merge them together
+ return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
+ }
+ }
return SDValue();
}
@@ -13149,9 +13330,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op1.getValueType();
SDValue CC;
- // Lower fp selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
- // are available. Otherwise fp cmovs get lowered into a less efficient branch
- // sequence later on.
+ // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
+ // are available or VBLENDV if AVX is available.
+ // Otherwise FP cmovs get lowered into a less efficient branch sequence later.
if (Cond.getOpcode() == ISD::SETCC &&
((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
(Subtarget->hasSSE1() && VT == MVT::f32)) &&
@@ -13166,8 +13347,42 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(SSECC, MVT::i8));
return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
}
+
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, MVT::i8));
+
+ // If we have AVX, we can use a variable vector select (VBLENDV) instead
+ // of 3 logic instructions for size savings and potentially speed.
+ // Unfortunately, there is no scalar form of VBLENDV.
+
+ // If either operand is a constant, don't try this. We can expect to
+ // optimize away at least one of the logic instructions later in that
+ // case, so that sequence would be faster than a variable blend.
+
+ // BLENDV was introduced with SSE 4.1, but the 2 register form implicitly
+ // uses XMM0 as the selection register. That may need just as many
+ // instructions as the AND/ANDN/OR sequence due to register moves, so
+ // don't bother.
+
+ if (Subtarget->hasAVX() &&
+ !isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
+
+ // Convert to vectors, do a VSELECT, and convert back to scalar.
+ // All of the conversions should be optimized away.
+
+ EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
+ SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
+ SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
+ SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
+
+ EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
+ VCmp = DAG.getNode(ISD::BITCAST, DL, VCmpVT, VCmp);
+
+ SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ VSel, DAG.getIntPtrConstant(0));
+ }
SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
@@ -14595,6 +14810,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps:
+ // Operands intentionally swapped. Mask is last operand to intrinsic,
+ // but second operand for node/instruction.
+ return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(1));
+
case Intrinsic::x86_avx512_mask_valign_q_512:
case Intrinsic::x86_avx512_mask_valign_d_512:
// Vector source operands are swapped.
@@ -16039,21 +16261,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- SDValue V;
assert(VT.isVector() && "Custom lowering only for vector shifts!");
assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
- V = LowerScalarImmediateShift(Op, DAG, Subtarget);
- if (V.getNode())
+ if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget))
return V;
- V = LowerScalarVariableShift(Op, DAG, Subtarget);
- if (V.getNode())
+ if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
return V;
if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
return Op;
+
// AVX2 has VPSLLV/VPSRAV/VPSRLV.
if (Subtarget->hasInt256()) {
if (Op.getOpcode() == ISD::SRL &&
@@ -16068,6 +16288,17 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return Op;
}
+ // 2i64 vector logical shifts can efficiently avoid scalarization - do the
+ // shifts per-lane and then shuffle the partial results back together.
+ if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
+ // Splat the shift amounts so the scalar shifts above will catch it.
+ SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
+ SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
+ SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0);
+ SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1);
+ return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
+ }
+
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
// Do this only if the vector shift count is a constant build_vector.
@@ -16238,7 +16469,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
- }
+ }
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.is256BitVector()) {
@@ -16254,12 +16485,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SDValue Amt1, Amt2;
if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
// Constant shift amount
- SmallVector<SDValue, 4> Amt1Csts;
- SmallVector<SDValue, 4> Amt2Csts;
- for (unsigned i = 0; i != NumElems/2; ++i)
- Amt1Csts.push_back(Amt->getOperand(i));
- for (unsigned i = NumElems/2; i != NumElems; ++i)
- Amt2Csts.push_back(Amt->getOperand(i));
+ SmallVector<SDValue, 8> Ops(Amt->op_begin(), Amt->op_begin() + NumElems);
+ ArrayRef<SDValue> Amt1Csts = makeArrayRef(Ops).slice(0, NumElems / 2);
+ ArrayRef<SDValue> Amt2Csts = makeArrayRef(Ops).slice(NumElems / 2);
Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts);
Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts);
@@ -16386,14 +16614,17 @@ bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
return needsCmpXchgNb(PTy->getElementType());
}
-bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
const Type *MemType = AI->getType();
// If the operand is too big, we must see if cmpxchg8/16b is available
// and default to library calls otherwise.
- if (MemType->getPrimitiveSizeInBits() > NativeWidth)
- return needsCmpXchgNb(MemType);
+ if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
+ return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg
+ : AtomicRMWExpansionKind::None;
+ }
AtomicRMWInst::BinOp Op = AI->getOperation();
switch (Op) {
@@ -16403,13 +16634,14 @@ bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
// It's better to use xadd, xsub or xchg for these in all cases.
- return false;
+ return AtomicRMWExpansionKind::None;
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
// If the atomicrmw's result isn't actually used, we can just add a "lock"
// prefix to a normal instruction for these operations.
- return !AI->use_empty();
+ return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg
+ : AtomicRMWExpansionKind::None;
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
@@ -16417,7 +16649,7 @@ bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::UMin:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
- return true;
+ return AtomicRMWExpansionKind::CmpXChg;
}
}
@@ -16874,7 +17106,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
- case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
@@ -17719,7 +17951,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI,
// 9 ) EFLAGS (implicit-def)
assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
- assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+ static_assert(X86::AddrNumOperands == 5,
+ "VAARG_64 assumes 5 address operands");
unsigned DestReg = MI->getOperand(0).getReg();
MachineOperand &Base = MI->getOperand(1);
@@ -18095,6 +18328,92 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
+
+ // We also lower double CMOVs:
+ // (CMOV (CMOV F, T, cc1), T, cc2)
+ // to two successives branches. For that, we look for another CMOV as the
+ // following instruction.
+ //
+ // Without this, we would add a PHI between the two jumps, which ends up
+ // creating a few copies all around. For instance, for
+ //
+ // (sitofp (zext (fcmp une)))
+ //
+ // we would generate:
+ //
+ // ucomiss %xmm1, %xmm0
+ // movss <1.0f>, %xmm0
+ // movaps %xmm0, %xmm1
+ // jne .LBB5_2
+ // xorps %xmm1, %xmm1
+ // .LBB5_2:
+ // jp .LBB5_4
+ // movaps %xmm1, %xmm0
+ // .LBB5_4:
+ // retq
+ //
+ // because this custom-inserter would have generated:
+ //
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ // | \
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // B: empty
+ // C: Z = PHI [X, A], [Y, B]
+ // D: empty
+ // E: PHI [X, C], [Z, D]
+ //
+ // If we lower both CMOVs in a single step, we can instead generate:
+ //
+ // A
+ // | \
+ // | C
+ // | /|
+ // |/ |
+ // | |
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // D: empty
+ // E: PHI [X, A], [X, C], [Y, D]
+ //
+ // Which, in our sitofp/fcmp example, gives us something like:
+ //
+ // ucomiss %xmm1, %xmm0
+ // movss <1.0f>, %xmm0
+ // jne .LBB5_4
+ // jp .LBB5_4
+ // xorps %xmm0, %xmm0
+ // .LBB5_4:
+ // retq
+ //
+ MachineInstr *NextCMOV = nullptr;
+ MachineBasicBlock::iterator NextMIIt =
+ std::next(MachineBasicBlock::iterator(MI));
+ if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
+ NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
+ NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
+ NextCMOV = &*NextMIIt;
+
+ MachineBasicBlock *jcc1MBB = nullptr;
+
+ // If we have a double CMOV, we lower it to two successive branches to
+ // the same block. EFLAGS is used by both, so mark it as live in the second.
+ if (NextCMOV) {
+ jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, jcc1MBB);
+ jcc1MBB->addLiveIn(X86::EFLAGS);
+ }
+
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, copy0MBB);
@@ -18103,8 +18422,10 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- if (!MI->killsRegister(X86::EFLAGS) &&
- !checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
+
+ MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
+ if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
+ !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
sinkMBB->addLiveIn(X86::EFLAGS);
}
@@ -18115,7 +18436,19 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
+ if (NextCMOV) {
+ // The fallthrough block may be jcc1MBB, if we have a double CMOV.
+ BB->addSuccessor(jcc1MBB);
+
+ // In that case, jcc1MBB will itself fallthrough the copy0MBB, and
+ // jump to the sinkMBB.
+ jcc1MBB->addSuccessor(copy0MBB);
+ jcc1MBB->addSuccessor(sinkMBB);
+ } else {
+ BB->addSuccessor(copy0MBB);
+ }
+
+ // The true block target of the first (or only) branch is always sinkMBB.
BB->addSuccessor(sinkMBB);
// Create the conditional branch instruction.
@@ -18123,6 +18456,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+ if (NextCMOV) {
+ unsigned Opc2 = X86::GetCondBranchFromCond(
+ (X86::CondCode)NextCMOV->getOperand(3).getImm());
+ BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
+ }
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -18131,10 +18470,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(X86::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ MachineInstrBuilder MIB =
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ // If we have a double CMOV, the second Jcc provides the same incoming
+ // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
+ if (NextCMOV) {
+ MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
+ // Copy the PHI result to the register defined by the second CMOV.
+ BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
+ DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg());
+ NextCMOV->eraseFromParent();
+ }
MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
@@ -18218,7 +18569,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
- Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
@@ -18303,7 +18654,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
- Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
@@ -19132,9 +19483,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// Note that even with AVX we prefer the PSHUFD form of shuffle for integer
// vectors because it can have a load folded into it that UNPCK cannot. This
// doesn't preclude something switching to the shorter encoding post-RA.
- if (FloatDomain) {
- if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
- bool Lo = Mask.equals(0, 0);
+ //
+ // FIXME: Should teach these routines about AVX vector widths.
+ if (FloatDomain && VT.getSizeInBits() == 128) {
+ if (Mask.equals({0, 0}) || Mask.equals({1, 1})) {
+ bool Lo = Mask.equals({0, 0});
unsigned Shuffle;
MVT ShuffleVT;
// Check if we have SSE3 which will let us use MOVDDUP. That instruction
@@ -19163,8 +19516,8 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
return true;
}
if (Subtarget->hasSSE3() &&
- (Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) {
- bool Lo = Mask.equals(0, 0, 2, 2);
+ (Mask.equals({0, 0, 2, 2}) || Mask.equals({1, 1, 3, 3}))) {
+ bool Lo = Mask.equals({0, 0, 2, 2});
unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -19177,8 +19530,8 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
/*AddTo*/ true);
return true;
}
- if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
- bool Lo = Mask.equals(0, 0, 1, 1);
+ if (Mask.equals({0, 0, 1, 1}) || Mask.equals({2, 2, 3, 3})) {
+ bool Lo = Mask.equals({0, 0, 1, 1});
unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -19196,12 +19549,12 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
// variants as none of these have single-instruction variants that are
// superior to the UNPCK formulation.
- if (!FloatDomain &&
- (Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) ||
- Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) ||
- Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) ||
- Mask.equals(8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15,
- 15))) {
+ if (!FloatDomain && VT.getSizeInBits() == 128 &&
+ (Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
+ Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) ||
+ Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) ||
+ Mask.equals(
+ {8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15}))) {
bool Lo = Mask[0] == 0;
unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -19237,9 +19590,9 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// in practice PSHUFB tends to be *very* fast so we're more aggressive.
if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
SmallVector<SDValue, 16> PSHUFBMask;
- assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
- int Ratio = 16 / Mask.size();
- for (unsigned i = 0; i < 16; ++i) {
+ int NumBytes = VT.getSizeInBits() / 8;
+ int Ratio = NumBytes / Mask.size();
+ for (int i = 0; i < NumBytes; ++i) {
if (Mask[i / Ratio] == SM_SentinelUndef) {
PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
continue;
@@ -19249,12 +19602,13 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
: 255;
PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8));
}
- Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input);
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input);
DCI.AddToWorklist(Op.getNode());
SDValue PSHUFBMaskOp =
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask);
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask);
DCI.AddToWorklist(PSHUFBMaskOp.getNode());
- Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp);
+ Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp);
DCI.AddToWorklist(Op.getNode());
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
/*AddTo*/ true);
@@ -19312,10 +19666,6 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
MVT VT = Op.getSimpleValueType();
if (!VT.isVector())
return false; // Bail if we hit a non-vector.
- // FIXME: This routine should be taught about 256-bit shuffles, or a 256-bit
- // version should be added.
- if (VT.getSizeInBits() != 128)
- return false;
assert(Root.getSimpleValueType().isVector() &&
"Shuffles operate on vector types!");
@@ -19418,12 +19768,26 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
/// PSHUF-style masks that can be reused with such instructions.
static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
+ MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
bool IsUnary;
- bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
+ bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary);
(void)HaveMask;
assert(HaveMask);
+ // If we have more than 128-bits, only the low 128-bits of shuffle mask
+ // matter. Check that the upper masks are repeats and remove them.
+ if (VT.getSizeInBits() > 128) {
+ int LaneElts = 128 / VT.getScalarSizeInBits();
+#ifndef NDEBUG
+ for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
+ for (int j = 0; j < LaneElts; ++j)
+ assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts &&
+ "Mask doesn't repeat in high 128-bit lanes!");
+#endif
+ Mask.resize(LaneElts);
+ }
+
switch (N.getOpcode()) {
case X86ISD::PSHUFD:
return Mask;
@@ -19496,7 +19860,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
case X86ISD::UNPCKH:
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
- if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
+ if (V.getSimpleValueType().getScalarType() != MVT::i8 &&
+ V.getSimpleValueType().getScalarType() != MVT::i16)
return SDValue();
// Search for a half-shuffle which we can combine with.
@@ -19670,8 +20035,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
- assert(VT == MVT::v8i16);
- (void)VT;
+ assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!");
if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
return SDValue(); // We combined away this shuffle, so we're done.
@@ -19679,17 +20043,18 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
// See if this reduces to a PSHUFD which is no more expensive and can
// combine with more operations. Note that it has to at least flip the
// dwords as otherwise it would have been removed as a no-op.
- if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3] == 1) {
+ if (makeArrayRef(Mask).equals({2, 3, 0, 1})) {
int DMask[] = {0, 1, 2, 3};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
DMask[DOffset + 0] = DOffset + 1;
DMask[DOffset + 1] = DOffset + 0;
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
+ MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
+ V = DAG.getNode(ISD::BITCAST, DL, DVT, V);
DCI.AddToWorklist(V.getNode());
- V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
+ V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
getV4X86ShuffleImm8ForMask(DMask, DAG));
DCI.AddToWorklist(V.getNode());
- return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ return DAG.getNode(ISD::BITCAST, DL, VT, V);
}
// Look for shuffle patterns which can be implemented as a single unpack.
@@ -19717,18 +20082,14 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
int MappedMask[8];
for (int i = 0; i < 8; ++i)
MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
- const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
- const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
- if (std::equal(std::begin(MappedMask), std::end(MappedMask),
- std::begin(UnpackLoMask)) ||
- std::equal(std::begin(MappedMask), std::end(MappedMask),
- std::begin(UnpackHiMask))) {
+ if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
+ makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
// We can replace all three shuffles with an unpack.
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
+ V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
: X86ISD::UNPCKH,
- DL, MVT::v8i16, V, V);
+ DL, VT, V, V);
}
}
}
@@ -19876,10 +20237,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
}
}
- // Only handle 128 wide vector from here on.
- if (!VT.is128BitVector())
- return SDValue();
-
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
// consecutive, non-overlapping, and in the right order.
@@ -20987,6 +21344,49 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
return SDValue();
}
+/// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS.
+/// Match:
+/// (X86or (X86setcc) (X86setcc))
+/// (X86cmp (and (X86setcc) (X86setcc)), 0)
+static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
+ X86::CondCode &CC1, SDValue &Flags,
+ bool &isAnd) {
+ if (Cond->getOpcode() == X86ISD::CMP) {
+ ConstantSDNode *CondOp1C = dyn_cast<ConstantSDNode>(Cond->getOperand(1));
+ if (!CondOp1C || !CondOp1C->isNullValue())
+ return false;
+
+ Cond = Cond->getOperand(0);
+ }
+
+ isAnd = false;
+
+ SDValue SetCC0, SetCC1;
+ switch (Cond->getOpcode()) {
+ default: return false;
+ case ISD::AND:
+ case X86ISD::AND:
+ isAnd = true;
+ // fallthru
+ case ISD::OR:
+ case X86ISD::OR:
+ SetCC0 = Cond->getOperand(0);
+ SetCC1 = Cond->getOperand(1);
+ break;
+ };
+
+ // Make sure we have SETCC nodes, using the same flags value.
+ if (SetCC0.getOpcode() != X86ISD::SETCC ||
+ SetCC1.getOpcode() != X86ISD::SETCC ||
+ SetCC0->getOperand(1) != SetCC1->getOperand(1))
+ return false;
+
+ CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0);
+ CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0);
+ Flags = SetCC0->getOperand(1);
+ return true;
+}
+
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -21156,6 +21556,44 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ // Fold and/or of setcc's to double CMOV:
+ // (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
+ // (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)
+ //
+ // This combine lets us generate:
+ // cmovcc1 (jcc1 if we don't have CMOV)
+ // cmovcc2 (same)
+ // instead of:
+ // setcc1
+ // setcc2
+ // and/or
+ // cmovne (jne if we don't have CMOV)
+ // When we can't use the CMOV instruction, it might increase branch
+ // mispredicts.
+ // When we can use CMOV, or when there is no mispredict, this improves
+ // throughput and reduces register pressure.
+ //
+ if (CC == X86::COND_NE) {
+ SDValue Flags;
+ X86::CondCode CC0, CC1;
+ bool isAndSetCC;
+ if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) {
+ if (isAndSetCC) {
+ std::swap(FalseOp, TrueOp);
+ CC0 = X86::GetOppositeBranchCondition(CC0);
+ CC1 = X86::GetOppositeBranchCondition(CC1);
+ }
+
+ SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, MVT::i8),
+ Flags};
+ SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps);
+ SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, MVT::i8), Flags};
+ SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1));
+ return CMOV;
+ }
+ }
+
return SDValue();
}
@@ -21166,24 +21604,16 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
default: return SDValue();
// SSE/AVX/AVX2 blend intrinsics.
case Intrinsic::x86_avx2_pblendvb:
- case Intrinsic::x86_avx2_pblendw:
- case Intrinsic::x86_avx2_pblendd_128:
- case Intrinsic::x86_avx2_pblendd_256:
// Don't try to simplify this intrinsic if we don't have AVX2.
if (!Subtarget->hasAVX2())
return SDValue();
// FALL-THROUGH
- case Intrinsic::x86_avx_blend_pd_256:
- case Intrinsic::x86_avx_blend_ps_256:
case Intrinsic::x86_avx_blendv_pd_256:
case Intrinsic::x86_avx_blendv_ps_256:
// Don't try to simplify this intrinsic if we don't have AVX.
if (!Subtarget->hasAVX())
return SDValue();
// FALL-THROUGH
- case Intrinsic::x86_sse41_pblendw:
- case Intrinsic::x86_sse41_blendpd:
- case Intrinsic::x86_sse41_blendps:
case Intrinsic::x86_sse41_blendvps:
case Intrinsic::x86_sse41_blendvpd:
case Intrinsic::x86_sse41_pblendvb: {
@@ -21640,7 +22070,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
// an and with a mask.
// We'd like to try to combine that into a shuffle with zero
// plus a bitcast, removing the and.
- if (N0.getOpcode() != ISD::BITCAST ||
+ if (N0.getOpcode() != ISD::BITCAST ||
N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE)
return SDValue();
@@ -21670,7 +22100,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
unsigned ResSize = N1.getValueType().getScalarSizeInBits();
// Make sure the splat matches the mask we expect
- if (SplatBitSize > ResSize ||
+ if (SplatBitSize > ResSize ||
(SplatValue + 1).exactLogBase2() != (int)SrcSize)
return SDValue();
@@ -21724,12 +22154,10 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget);
- if (Zext.getNode())
+ if (SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget))
return Zext;
- SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
- if (R.getNode())
+ if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
return R;
EVT VT = N->getValueType(0);
@@ -22521,7 +22949,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
// If A and B occur in reverse order in RHS, then "swap" them (which means
// rewriting the mask).
if (A != C)
- CommuteVectorShuffleMask(RMask, NumElts);
+ ShuffleVectorSDNode::commuteMask(RMask);
// At this point LHS and RHS are equivalent to
// LHS = VECTOR_SHUFFLE A, B, LMask
@@ -22630,7 +23058,7 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
-
+
return SDValue();
}
@@ -22864,45 +23292,51 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
- LHS.getValueType(), RHS, LHS.getOperand(1));
- return DAG.getSetCC(SDLoc(N), N->getValueType(0),
- addV, DAG.getConstant(0, addV.getValueType()), CC);
+ SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS,
+ LHS.getOperand(1));
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
+ DAG.getConstant(0, addV.getValueType()), CC);
}
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
- RHS.getValueType(), LHS, RHS.getOperand(1));
- return DAG.getSetCC(SDLoc(N), N->getValueType(0),
- addV, DAG.getConstant(0, addV.getValueType()), CC);
+ SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS,
+ RHS.getOperand(1));
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
+ DAG.getConstant(0, addV.getValueType()), CC);
}
- if (VT.getScalarType() == MVT::i1) {
- bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
- (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
- bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
- if (!IsSEXT0 && !IsVZero0)
- return SDValue();
- bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
- (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
+ if (VT.getScalarType() == MVT::i1 &&
+ (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
+ bool IsSEXT0 =
+ (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
+ (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
- if (!IsSEXT1 && !IsVZero1)
- return SDValue();
+ if (!IsSEXT0 || !IsVZero1) {
+ // Swap the operands and update the condition code.
+ std::swap(LHS, RHS);
+ CC = ISD::getSetCCSwappedOperands(CC);
+
+ IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
+ (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
+ IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
+ }
if (IsSEXT0 && IsVZero1) {
- assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
- if (CC == ISD::SETEQ)
+ assert(VT == LHS.getOperand(0).getValueType() &&
+ "Uexpected operand type");
+ if (CC == ISD::SETGT)
+ return DAG.getConstant(0, VT);
+ if (CC == ISD::SETLE)
+ return DAG.getConstant(1, VT);
+ if (CC == ISD::SETEQ || CC == ISD::SETGE)
return DAG.getNOT(DL, LHS.getOperand(0), VT);
+
+ assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
+ "Unexpected condition code!");
return LHS.getOperand(0);
}
- if (IsSEXT1 && IsVZero0) {
- assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
- if (CC == ISD::SETEQ)
- return DAG.getNOT(DL, RHS.getOperand(0), VT);
- return RHS.getOperand(0);
- }
}
return SDValue();
@@ -22940,7 +23374,7 @@ static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
// countS and just gets an f32 from that address.
unsigned DestIndex =
cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
-
+
Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
// Create this as a scalar to vector to match the instruction pattern.
@@ -22964,7 +23398,7 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
// pattern-matching possibilities related to scalar math ops in SSE/AVX.
// x86InstrInfo knows how to commute this back after instruction selection
// if it would help register allocation.
-
+
// TODO: If optimizing for size or a processor that doesn't suffer from
// partial register update stalls, this should be transformed into a MOVSD
// instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
@@ -23503,27 +23937,23 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
-namespace {
- // Helper to match a string separated by whitespace.
- bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) {
- s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace.
-
- for (unsigned i = 0, e = args.size(); i != e; ++i) {
- StringRef piece(*args[i]);
- if (!s.startswith(piece)) // Check if the piece matches.
- return false;
+// Helper to match a string separated by whitespace.
+static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
+ S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace.
- s = s.substr(piece.size());
- StringRef::size_type pos = s.find_first_not_of(" \t");
- if (pos == 0) // We matched a prefix.
- return false;
+ for (StringRef Piece : Pieces) {
+ if (!S.startswith(Piece)) // Check if the piece matches.
+ return false;
- s = s.substr(pos);
- }
+ S = S.substr(Piece.size());
+ StringRef::size_type Pos = S.find_first_not_of(" \t");
+ if (Pos == 0) // We matched a prefix.
+ return false;
- return s.empty();
+ S = S.substr(Pos);
}
- const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
+
+ return S.empty();
}
static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
@@ -23563,12 +23993,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
// ops instead of emitting the bswap asm. For now, we don't support 486 or
// lower so don't worry about this.
// bswap $0
- if (matchAsm(AsmPieces[0], "bswap", "$0") ||
- matchAsm(AsmPieces[0], "bswapl", "$0") ||
- matchAsm(AsmPieces[0], "bswapq", "$0") ||
- matchAsm(AsmPieces[0], "bswap", "${0:q}") ||
- matchAsm(AsmPieces[0], "bswapl", "${0:q}") ||
- matchAsm(AsmPieces[0], "bswapq", "${0:q}")) {
+ if (matchAsm(AsmPieces[0], {"bswap", "$0"}) ||
+ matchAsm(AsmPieces[0], {"bswapl", "$0"}) ||
+ matchAsm(AsmPieces[0], {"bswapq", "$0"}) ||
+ matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) ||
+ matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) ||
+ matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -23577,8 +24007,8 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
- (matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") ||
- matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) {
+ (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) ||
+ matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
@@ -23590,9 +24020,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
case 3:
if (CI->getType()->isIntegerTy(32) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
- matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") &&
- matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") &&
- matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) {
+ matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) &&
+ matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
+ matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
@@ -23607,9 +24037,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
- if (matchAsm(AsmPieces[0], "bswap", "%eax") &&
- matchAsm(AsmPieces[1], "bswap", "%edx") &&
- matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx"))
+ if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
+ matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
+ matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"}))
return IntrinsicLowering::LowerToByteSwap(CI);
}
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 4423015..dd20ec2 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -30,37 +30,37 @@ namespace llvm {
// Start the numbering where the builtin ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- /// BSF - Bit scan forward.
- /// BSR - Bit scan reverse.
+ /// Bit scan forward.
BSF,
+ /// Bit scan reverse.
BSR,
- /// SHLD, SHRD - Double shift instructions. These correspond to
+ /// Double shift instructions. These correspond to
/// X86::SHLDxx and X86::SHRDxx instructions.
SHLD,
SHRD,
- /// FAND - Bitwise logical AND of floating point values. This corresponds
+ /// Bitwise logical AND of floating point values. This corresponds
/// to X86::ANDPS or X86::ANDPD.
FAND,
- /// FOR - Bitwise logical OR of floating point values. This corresponds
+ /// Bitwise logical OR of floating point values. This corresponds
/// to X86::ORPS or X86::ORPD.
FOR,
- /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// Bitwise logical XOR of floating point values. This corresponds
/// to X86::XORPS or X86::XORPD.
FXOR,
- /// FANDN - Bitwise logical ANDNOT of floating point values. This
+ /// Bitwise logical ANDNOT of floating point values. This
/// corresponds to X86::ANDNPS or X86::ANDNPD.
FANDN,
- /// FSRL - Bitwise logical right shift of floating point values. These
+ /// Bitwise logical right shift of floating point values. This
/// corresponds to X86::PSRLDQ.
FSRL,
- /// CALL - These operations represent an abstract X86 call
+ /// These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:
///
@@ -79,8 +79,7 @@ namespace llvm {
///
CALL,
- /// RDTSC_DAG - This operation implements the lowering for
- /// readcyclecounter
+ /// This operation implements the lowering for readcyclecounter
RDTSC_DAG,
/// X86 Read Time-Stamp Counter and Processor ID.
@@ -131,187 +130,186 @@ namespace llvm {
/// 1 is the number of bytes of stack to pop.
RET_FLAG,
- /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ /// Repeat fill, corresponds to X86::REP_STOSx.
REP_STOS,
- /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ /// Repeat move, corresponds to X86::REP_MOVSx.
REP_MOVS,
- /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+ /// On Darwin, this node represents the result of the popl
/// at function entry, used for PIC code.
GlobalBaseReg,
- /// Wrapper - A wrapper node for TargetConstantPool,
+ /// A wrapper node for TargetConstantPool,
/// TargetExternalSymbol, and TargetGlobalAddress.
Wrapper,
- /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+ /// Special wrapper used under X86-64 PIC mode for RIP
/// relative displacements.
WrapperRIP,
- /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+ /// Copies a 64-bit value from the low word of an XMM vector
/// to an MMX vector. If you think this is too close to the previous
/// mnemonic, so do I; blame Intel.
MOVDQ2Q,
- /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+ /// Copies a 32-bit value from the low word of a MMX
/// vector to a GPR.
MMX_MOVD2W,
- /// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector
+ /// Copies a GPR into the low 32-bit word of a MMX vector
/// and zero out the high word.
MMX_MOVW2D,
- /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+ /// Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
- /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+ /// Extract a 16-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRW.
PEXTRW,
- /// INSERTPS - Insert any element of a 4 x float vector into any element
+ /// Insert any element of a 4 x float vector into any element
/// of a destination 4 x floatvector.
INSERTPS,
- /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+ /// Insert the lower 8-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRB.
PINSRB,
- /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+ /// Insert the lower 16-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRW.
PINSRW, MMX_PINSRW,
- /// PSHUFB - Shuffle 16 8-bit values within a vector.
+ /// Shuffle 16 8-bit values within a vector.
PSHUFB,
- /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+ /// Bitwise Logical AND NOT of Packed FP values.
ANDNP,
- /// PSIGN - Copy integer sign.
+ /// Copy integer sign.
PSIGN,
- /// BLENDI - Blend where the selector is an immediate.
+ /// Blend where the selector is an immediate.
BLENDI,
- /// SHRUNKBLEND - Blend where the condition has been shrunk.
+ /// Blend where the condition has been shrunk.
/// This is used to emphasize that the condition mask is
/// no more valid for generic VSELECT optimizations.
SHRUNKBLEND,
- /// ADDSUB - Combined add and sub on an FP vector.
+ /// Combined add and sub on an FP vector.
ADDSUB,
- // FADD, FSUB, FMUL, FDIV, FMIN, FMAX - FP vector ops with rounding mode.
+ // FP vector ops with rounding mode.
FADD_RND,
FSUB_RND,
FMUL_RND,
FDIV_RND,
- // SUBUS - Integer sub with unsigned saturation.
+ // Integer sub with unsigned saturation.
SUBUS,
- /// HADD - Integer horizontal add.
+ /// Integer horizontal add.
HADD,
- /// HSUB - Integer horizontal sub.
+ /// Integer horizontal sub.
HSUB,
- /// FHADD - Floating point horizontal add.
+ /// Floating point horizontal add.
FHADD,
- /// FHSUB - Floating point horizontal sub.
+ /// Floating point horizontal sub.
FHSUB,
- /// UMAX, UMIN - Unsigned integer max and min.
+ /// Unsigned integer max and min.
UMAX, UMIN,
- /// SMAX, SMIN - Signed integer max and min.
+ /// Signed integer max and min.
SMAX, SMIN,
- /// FMAX, FMIN - Floating point max and min.
- ///
+ /// Floating point max and min.
FMAX, FMIN,
- /// FMAXC, FMINC - Commutative FMIN and FMAX.
+ /// Commutative FMIN and FMAX.
FMAXC, FMINC,
- /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
- /// approximation. Note that these typically require refinement
+ /// Floating point reciprocal-sqrt and reciprocal approximation.
+ /// Note that these typically require refinement
/// in order to obtain suitable precision.
FRSQRT, FRCP,
- // TLSADDR - Thread Local Storage.
+ // Thread Local Storage.
TLSADDR,
- // TLSBASEADDR - Thread Local Storage. A call to get the start address
+ // Thread Local Storage. A call to get the start address
// of the TLS block for the current module.
TLSBASEADDR,
- // TLSCALL - Thread Local Storage. When calling to an OS provided
+ // Thread Local Storage. When calling to an OS provided
// thunk at the address from an earlier relocation.
TLSCALL,
- // EH_RETURN - Exception Handling helpers.
+ // Exception Handling helpers.
EH_RETURN,
- // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ // SjLj exception handling setjmp.
EH_SJLJ_SETJMP,
- // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ // SjLj exception handling longjmp.
EH_SJLJ_LONGJMP,
- /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for
+ /// Tail call return. See X86TargetLowering::LowerCall for
/// the list of operands.
TC_RETURN,
- // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements.
+ // Vector move to low scalar and zero higher vector elements.
VZEXT_MOVL,
- // VZEXT - Vector integer zero-extend.
+ // Vector integer zero-extend.
VZEXT,
- // VSEXT - Vector integer signed-extend.
+ // Vector integer signed-extend.
VSEXT,
- // VTRUNC - Vector integer truncate.
+ // Vector integer truncate.
VTRUNC,
- // VTRUNC - Vector integer truncate with mask.
+ // Vector integer truncate with mask.
VTRUNCM,
- // VFPEXT - Vector FP extend.
+ // Vector FP extend.
VFPEXT,
- // VFPROUND - Vector FP round.
+ // Vector FP round.
VFPROUND,
- // VSHL, VSRL - 128-bit vector logical left / right shift
+ // 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
- // VSHL, VSRL, VSRA - Vector shift elements
+ // Vector shift elements
VSHL, VSRL, VSRA,
- // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
+ // Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
- // CMPP - Vector packed double/float comparison.
+ // Vector packed double/float comparison.
CMPP,
- // PCMP* - Vector integer comparisons.
+ // Vector integer comparisons.
PCMPEQ, PCMPGT,
- // PCMP*M - Vector integer comparisons, the result is in a mask vector.
+ // Vector integer comparisons, the result is in a mask vector.
PCMPEQM, PCMPGTM,
- /// CMPM, CMPMU - Vector comparison generating mask bits for fp and
+ /// Vector comparison generating mask bits for fp and
/// integer signed and unsigned data types.
CMPM,
CMPMU,
- // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+ // Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
- BEXTR, // BEXTR - Bit field extract
+ BEXTR, // Bit field extract
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
@@ -322,16 +320,16 @@ namespace llvm {
UDIVREM8_ZEXT_HREG,
SDIVREM8_SEXT_HREG,
- // MUL_IMM - X86 specific multiply by immediate.
+ // X86-specific multiply by immediate.
MUL_IMM,
- // PTEST - Vector bitwise comparisons.
+ // Vector bitwise comparisons.
PTEST,
- // TESTP - Vector packed fp sign bitwise comparisons.
+ // Vector packed fp sign bitwise comparisons.
TESTP,
- // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector.
+ // Vector "test" in AVX-512, the result is in a mask vector.
TESTM,
TESTNM,
@@ -697,6 +695,12 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
/// Given a physical register constraint
/// (e.g. {edx}), return the register number and the register class for the
/// register. This should only be used for C_Register constraints. On
@@ -993,7 +997,8 @@ namespace llvm {
bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 4923bc5..509602f 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -74,6 +74,15 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
!if (!eq (Size, 128), "v2i64",
!if (!eq (Size, 256), "v4i64",
VTName)), VTName));
+
+ PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
+ !if (!eq (TypeVariantName, "i"),
+ !if (!eq (Size, 128), "v2i64",
+ !if (!eq (Size, 256), "v4i64",
+ !if (!eq (Size, 512),
+ !if (!eq (EltSize, 64), "v8i64", "v16i32"),
+ VTName))), VTName));
+
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
// The corresponding float type, e.g. v16f32 for v16i32
@@ -107,6 +116,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
// create the canonical constant zero node ImmAllZerosV.
ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
+
+ string ZSuffix = !if (!eq (Size, 128), "Z128",
+ !if (!eq (Size, 256), "Z256", "Z"));
}
def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
@@ -1559,6 +1571,11 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
!strconcat("vcmp", suffix,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
+ def rrib_alt: AVX512PIi8<0xC2, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
+ !strconcat("vcmp", suffix,
+ "\t{{sae}, $cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc, {sae}}"),
+ [], d>, EVEX_B;
let mayLoad = 1 in
def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
@@ -2047,6 +2064,8 @@ let Predicates = [HasVLX] in {
(v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
(v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+ def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
(v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
@@ -2062,177 +2081,193 @@ def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
(v8i1 (COPY_TO_REGCLASS
(KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16),
(I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
+
+def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))),
+ (v4i1 (COPY_TO_REGCLASS
+ (KSHIFTLWri (COPY_TO_REGCLASS VK4:$src, VK16),
+ (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
+
+def : Pat<(v4i1 (X86vsrli VK4:$src, (i8 imm:$imm))),
+ (v4i1 (COPY_TO_REGCLASS
+ (KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16),
+ (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - Aligned and unaligned load and store
//
-multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
- RegisterClass KRC, RegisterClass RC,
- ValueType vt, ValueType zvt, X86MemOperand memop,
- Domain d, bit IsReMaterializable = 1> {
-let hasSideEffects = 0 in {
- def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+
+multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ PatFrag ld_frag, PatFrag mload,
+ bit IsReMaterializable = 1> {
+ let hasSideEffects = 0 in {
+ def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
- d>, EVEX;
- def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
+ _.ExeDomain>, EVEX;
+ def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
- }
+ "${dst} {${mask}} {z}, $src}"), [], _.ExeDomain>,
+ EVEX, EVEX_KZ;
+
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
SchedRW = [WriteLoad] in
- def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
+ def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
- d>, EVEX;
-
- let AddedComplexity = 20 in {
- let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
- let hasSideEffects = 0 in
- def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1),
- !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
- "${dst} {${mask}}, $src1}"),
- [(set RC:$dst, (vt (vselect KRC:$mask,
- (vt RC:$src1),
- (vt RC:$src0))))],
- d>, EVEX, EVEX_K;
+ [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
+ _.ExeDomain>, EVEX;
+
+ let Constraints = "$src0 = $dst" in {
+ def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
+ !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src1}"),
+ [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+ (_.VT _.RC:$src1),
+ (_.VT _.RC:$src0))))], _.ExeDomain>,
+ EVEX, EVEX_K;
let mayLoad = 1, SchedRW = [WriteLoad] in
- def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, memop:$src1),
+ def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
"${dst} {${mask}}, $src1}"),
- [(set RC:$dst, (vt
- (vselect KRC:$mask,
- (vt (bitconvert (ld_frag addr:$src1))),
- (vt RC:$src0))))],
- d>, EVEX, EVEX_K;
+ [(set _.RC:$dst, (_.VT
+ (vselect _.KRCWM:$mask,
+ (_.VT (bitconvert (ld_frag addr:$src1))),
+ (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K;
}
let mayLoad = 1, SchedRW = [WriteLoad] in
- def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, memop:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"),
- [(set RC:$dst, (vt
- (vselect KRC:$mask,
- (vt (bitconvert (ld_frag addr:$src))),
- (vt (bitconvert (zvt immAllZerosV))))))],
- d>, EVEX, EVEX_KZ;
+ def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src),
+ OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
+ "${dst} {${mask}} {z}, $src}",
+ [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+ (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
+ _.ExeDomain>, EVEX, EVEX_KZ;
}
+ def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
+ (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+
+ def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+
+ def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
+ (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
+ _.KRCWM:$mask, addr:$ptr)>;
}
-multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
- string elty, string elsz, string vsz512,
- string vsz256, string vsz128, Domain d,
- Predicate prd, bit IsReMaterializable = 1> {
+multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _,
+ Predicate prd,
+ bit IsReMaterializable = 1> {
let Predicates = [prd] in
- defm Z : avx512_load<opc, OpcodeStr,
- !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
- !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
- !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
- !cast<X86MemOperand>(elty##"512mem"), d,
- IsReMaterializable>, EVEX_V512;
+ defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
+ masked_load_aligned512, IsReMaterializable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_load<opc, OpcodeStr,
- !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
- "v"##vsz256##elty##elsz, "v4i64")),
- !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
- !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
- !cast<X86MemOperand>(elty##"256mem"), d,
- IsReMaterializable>, EVEX_V256;
-
- defm Z128 : avx512_load<opc, OpcodeStr,
- !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
- "v"##vsz128##elty##elsz, "v2i64")),
- !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
- !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
- !cast<X86MemOperand>(elty##"128mem"), d,
- IsReMaterializable>, EVEX_V128;
+ defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
+ masked_load_aligned256, IsReMaterializable>, EVEX_V256;
+ defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
+ masked_load_aligned128, IsReMaterializable>, EVEX_V128;
}
}
+multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _,
+ Predicate prd,
+ bit IsReMaterializable = 1> {
+ let Predicates = [prd] in
+ defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
+ masked_load_unaligned, IsReMaterializable>, EVEX_V512;
-multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
- ValueType OpVT, RegisterClass KRC, RegisterClass RC,
- X86MemOperand memop, Domain d> {
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
+ masked_load_unaligned, IsReMaterializable>, EVEX_V256;
+ defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
+ masked_load_unaligned, IsReMaterializable>, EVEX_V128;
+ }
+}
+
+multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ PatFrag st_frag, PatFrag mstore> {
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
- def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
- EVEX;
+ def rr_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
+ OpcodeStr # "\t{$src, $dst|$dst, $src}", [],
+ _.ExeDomain>, EVEX;
let Constraints = "$src1 = $dst" in
- def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
- EVEX, EVEX_K;
- def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [], d>, EVEX, EVEX_KZ;
+ def rrk_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.KRCWM:$mask, _.RC:$src2),
+ OpcodeStr #
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}",
+ [], _.ExeDomain>, EVEX, EVEX_K;
+ def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr #
+ "\t{$src, ${dst} {${mask}} {z}|" #
+ "${dst} {${mask}} {z}, $src}",
+ [], _.ExeDomain>, EVEX, EVEX_KZ;
}
let mayStore = 1 in {
- def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+ def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
+ [(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX;
def mrk : AVX512PI<opc, MRMDestMem, (outs),
- (ins memop:$dst, KRC:$mask, RC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
- [], d>, EVEX, EVEX_K;
+ (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
+ [], _.ExeDomain>, EVEX, EVEX_K;
}
+
+ def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
+ (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
+ _.KRCWM:$mask, _.RC:$src)>;
}
-multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
- string st_suff_512, string st_suff_256,
- string st_suff_128, string elty, string elsz,
- string vsz512, string vsz256, string vsz128,
- Domain d, Predicate prd> {
+multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
- !cast<ValueType>("v"##vsz512##elty##elsz),
- !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
- !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
+ defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
+ masked_store_unaligned>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
- !cast<ValueType>("v"##vsz256##elty##elsz),
- !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
- !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
-
- defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
- !cast<ValueType>("v"##vsz128##elty##elsz),
- !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
- !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
+ defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
+ masked_store_unaligned>, EVEX_V256;
+ defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
+ masked_store_unaligned>, EVEX_V128;
}
}
-defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
- "16", "8", "4", SSEPackedSingle, HasAVX512>,
- avx512_store_vl<0x29, "vmovaps", "alignedstore",
- "512", "256", "", "f", "32", "16", "8", "4",
- SSEPackedSingle, HasAVX512>,
- PS, EVEX_CD8<32, CD8VF>;
+multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
+ masked_store_aligned512>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
+ masked_store_aligned256>, EVEX_V256;
+ defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
+ masked_store_aligned128>, EVEX_V128;
+ }
+}
-defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
- "8", "4", "2", SSEPackedDouble, HasAVX512>,
- avx512_store_vl<0x29, "vmovapd", "alignedstore",
- "512", "256", "", "f", "64", "8", "4", "2",
- SSEPackedDouble, HasAVX512>,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
- "16", "8", "4", SSEPackedSingle, HasAVX512>,
- avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
- "16", "8", "4", SSEPackedSingle, HasAVX512>,
+defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
+ HasAVX512>,
+ avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
+ HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
+ HasAVX512>,
+ avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
+ HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512>,
+ avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>,
PS, EVEX_CD8<32, CD8VF>;
-defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
- "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
- avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
- "8", "4", "2", SSEPackedDouble, HasAVX512>,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>,
+ avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
(bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
@@ -2276,6 +2311,7 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
(VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
+let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
(VMOVUPSZmrk addr:$ptr,
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
@@ -2285,73 +2321,36 @@ def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
(v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)),
- (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
-
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)),
- (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
- (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask,
- (bc_v16f32 (v16i32 immAllZerosV)))),
- (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))),
- (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
- (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask,
- (bc_v8f64 (v16i32 immAllZerosV)))),
- (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))),
- (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
-
def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))),
(v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm),
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
+}
-defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
- "16", "8", "4", SSEPackedInt, HasAVX512>,
- avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
- "512", "256", "", "i", "32", "16", "8", "4",
- SSEPackedInt, HasAVX512>,
- PD, EVEX_CD8<32, CD8VF>;
-
-defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
- "8", "4", "2", SSEPackedInt, HasAVX512>,
- avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
- "512", "256", "", "i", "64", "8", "4", "2",
- SSEPackedInt, HasAVX512>,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
- "64", "32", "16", SSEPackedInt, HasBWI>,
- avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
- "i", "8", "64", "32", "16", SSEPackedInt,
+defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
+ HasAVX512>,
+ avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
+ HasAVX512>, PD, EVEX_CD8<32, CD8VF>;
+
+defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
+ HasAVX512>,
+ avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
+ HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
+ avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
HasBWI>, XD, EVEX_CD8<8, CD8VF>;
-defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
- "32", "16", "8", SSEPackedInt, HasBWI>,
- avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
- "i", "16", "32", "16", "8", SSEPackedInt,
+defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
+ avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
-defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
- "16", "8", "4", SSEPackedInt, HasAVX512>,
- avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
- "i", "32", "16", "8", "4", SSEPackedInt,
+defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
-defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
- "8", "4", "2", SSEPackedInt, HasAVX512>,
- avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
- "i", "64", "8", "4", "2", SSEPackedInt,
+defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
@@ -2389,37 +2388,8 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
(VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
}
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))),
- (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
- (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))),
- (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask,
- (bc_v8i64 (v16i32 immAllZerosV)))),
- (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
- (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))),
- (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)),
- (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
-
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)),
- (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
-
-// SKX replacement
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
- (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>;
-
-// KNL replacement
+// NoVLX patterns
+let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
(VMOVDQU32Zmrk addr:$ptr,
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
@@ -2428,7 +2398,7 @@ def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
(v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-
+}
// Move Int Doubleword to Packed Double Int
//
@@ -3243,28 +3213,95 @@ defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
//===----------------------------------------------------------------------===//
+multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ SDNode OpNode, SDNode VecNode, OpndItins itins,
+ bit IsCommutable> {
-multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SizeItins itins> {
- defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
- f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
- EVEX_CD8<32, CD8VT1>;
- defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
- f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
- EVEX_CD8<64, CD8VT1>;
+ defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 FROUND_CURRENT)),
+ "", itins.rr, IsCommutable>;
+
+ defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (VecNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+ (i32 FROUND_CURRENT)),
+ "", itins.rm, IsCommutable>;
+ let isCodeGenOnly = 1, isCommutable = IsCommutable,
+ Predicates = [HasAVX512] in {
+ def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
+ itins.rr>;
+ def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src2)))], itins.rr>;
+ }
}
-let isCommutable = 1 in {
-defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
-defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
-defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
-defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
+multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ SDNode VecNode, OpndItins itins, bit IsCommutable> {
+
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 imm:$rc)), "", itins.rr, IsCommutable>,
+ EVEX_B, EVEX_RC;
}
-let isCommutable = 0 in {
-defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
-defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
+multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ SDNode VecNode, OpndItins itins, bit IsCommutable> {
+
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
}
+multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode VecNode,
+ SizeItins itins, bit IsCommutable> {
+ defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
+ itins.s, IsCommutable>,
+ avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
+ itins.s, IsCommutable>,
+ XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
+ itins.d, IsCommutable>,
+ avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
+ itins.d, IsCommutable>,
+ XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+}
+
+multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode VecNode,
+ SizeItins itins, bit IsCommutable> {
+ defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
+ itins.s, IsCommutable>,
+ avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
+ itins.s, IsCommutable>,
+ XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
+ itins.d, IsCommutable>,
+ avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
+ itins.d, IsCommutable>,
+ XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+}
+defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
+defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>;
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>;
+defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>;
+defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>;
+
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, bit IsCommutable> {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -3411,15 +3448,27 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
" ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
+ let mayLoad = 1 in
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode (_.LdFrag addr:$src1), (i8 imm:$src2))),
+ (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i8 imm:$src2))),
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
}
+multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ let mayLoad = 1 in
+ defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
+ "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
+ (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B;
+}
+
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
+ ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
// src2 is always 128-bit
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
@@ -3430,46 +3479,95 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
- " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V;
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
+ EVEX_4V;
}
multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
- defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, _>, EVEX_V512;
+ ValueType SrcVT, PatFrag bc_frag,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+ VTInfo.info512>, EVEX_V512,
+ EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+ VTInfo.info256>, EVEX_V256,
+ EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
+ defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+ VTInfo.info128>, EVEX_V128,
+ EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
+ }
}
-multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, string OpcodeStr,
- SDNode OpNode> {
+multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
+ string OpcodeStr, SDNode OpNode> {
defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
- v16i32_info>, EVEX_CD8<32, CD8VQ>;
+ avx512vl_i32_info, HasAVX512>;
defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
- v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
+ avx512vl_i64_info, HasAVX512>, VEX_W;
+ defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
+ avx512vl_i16_info, HasBWI>;
+}
+
+multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo> {
+ let Predicates = [HasAVX512] in
+ defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info512>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info512>, EVEX_V512;
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info256>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info256>, EVEX_V256;
+ defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info128>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info128>, EVEX_V128;
+ }
}
-defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
- v16i32_info>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
- v8i64_info>, EVEX_V512,
- EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_shift_rmi_w<bits<8> opcw,
+ Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode> {
+ let Predicates = [HasBWI] in
+ defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ v32i16_info>, EVEX_V512;
+ let Predicates = [HasVLX, HasBWI] in {
+ defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ v16i16x_info>, EVEX_V256;
+ defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ v8i16x_info>, EVEX_V128;
+ }
+}
-defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
- v16i32_info>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
- v8i64_info>, EVEX_V512,
- EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
+ Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode> {
+ defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
+ avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+ defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
+ avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
+}
-defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
- v16i32_info>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
- v8i64_info>, EVEX_V512,
- EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
+ avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>;
-defm VPSLL : avx512_shift_types<0xF2, 0xF3, "vpsll", X86vshl>;
-defm VPSRA : avx512_shift_types<0xE2, 0xE2, "vpsra", X86vsra>;
-defm VPSRL : avx512_shift_types<0xD2, 0xD3, "vpsrl", X86vsrl>;
+defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
+ avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>;
+
+defm VPSRA : avx512_shift_rmi_dq<0x72, 0x73, MRM4r, MRM4m, "vpsra", X86vsrai>,
+ avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>;
+
+defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>;
+defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>;
+
+defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
+defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
+defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
//===-------------------------------------------------------------------===//
// Variable Bit Shifts
@@ -3481,29 +3579,71 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
" ", SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
+ let mayLoad = 1 in
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
- " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V;
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
+ EVEX_CD8<_.EltSize, CD8VF>;
}
+multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let mayLoad = 1 in
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+}
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
- defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+ defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
+ }
}
multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
- avx512vl_i32_info>, EVEX_CD8<32, CD8VQ>;
+ avx512vl_i32_info>;
defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
- avx512vl_i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
+ avx512vl_i64_info>, VEX_W;
}
-defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>;
-defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>;
-defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>;
+multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let Predicates = [HasBWI] in
+ defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
+ EVEX_V512, VEX_W;
+ let Predicates = [HasVLX, HasBWI] in {
+
+ defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
+ EVEX_V256, VEX_W;
+ defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
+ EVEX_V128, VEX_W;
+ }
+}
+
+defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
+ avx512_var_shift_w<0x12, "vpsllvw", shl>;
+defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
+ avx512_var_shift_w<0x11, "vpsravw", sra>;
+defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
+ avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
+defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
@@ -4919,81 +5059,74 @@ defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
-multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86MemOperand memop, PatFrag GatherNode> {
-let mayLoad = 1, hasTwoExplicitDefs = 1,
+multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand memop> {
+let mayLoad = 1,
Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
- def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
- (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
+ (ins RC:$src1, KRC:$mask, memop:$src2),
!strconcat(OpcodeStr,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- [(set _.RC:$dst, _.KRCWM:$mask_wb,
- (_.VT (GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
- vectoraddr:$src2)))]>, EVEX, EVEX_K,
- EVEX_CD8<_.EltSize, CD8VT1>;
+ []>, EVEX, EVEX_K;
}
let ExeDomain = SSEPackedDouble in {
-defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
- mgatherv8i32>, EVEX_V512, VEX_W;
-defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
- mgatherv8i64>, EVEX_V512, VEX_W;
+defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let ExeDomain = SSEPackedSingle in {
-defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
- mgatherv16i32>, EVEX_V512;
-defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
- mgatherv8i64>, EVEX_V512;
+defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
}
-defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem,
- mgatherv8i32>, EVEX_V512, VEX_W;
-defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
- mgatherv16i32>, EVEX_V512;
-
-defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem,
- mgatherv8i64>, EVEX_V512, VEX_W;
-defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem,
- mgatherv8i64>, EVEX_V512;
+defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
-multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86MemOperand memop, PatFrag ScatterNode> {
+defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand memop> {
let mayStore = 1, Constraints = "$mask = $mask_wb" in
-
- def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
- (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
+ def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
+ (ins memop:$dst, KRC:$mask, RC:$src2),
!strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
- [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
- _.KRCWM:$mask, vectoraddr:$dst))]>,
- EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
}
let ExeDomain = SSEPackedDouble in {
-defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem,
- mscatterv8i32>, EVEX_V512, VEX_W;
-defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem,
- mscatterv8i64>, EVEX_V512, VEX_W;
+defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let ExeDomain = SSEPackedSingle in {
-defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem,
- mscatterv16i32>, EVEX_V512;
-defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem,
- mscatterv8i64>, EVEX_V512;
+defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
}
-defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem,
- mscatterv8i32>, EVEX_V512, VEX_W;
-defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem,
- mscatterv16i32>, EVEX_V512;
+defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem,
- mscatterv8i64>, EVEX_V512, VEX_W;
-defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem,
- mscatterv8i64>, EVEX_V512;
+defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index bf515a8..0bdabdf 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -282,6 +282,8 @@ def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
+def X86fmaxRnd : SDNode<"X86ISD::FMAX", SDTFPBinOpRound>;
+def X86fminRnd : SDNode<"X86ISD::FMIN", SDTFPBinOpRound>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
@@ -304,8 +306,6 @@ def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
-def X86mgather : SDNode<"X86ISD::GATHER", SDTypeProfile<1, 3,
- [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -526,58 +526,6 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
-def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_gather node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
- // return (Mgt->getIndex().getValueType() == MVT::v8i32 ||
- // Mgt->getBasePtr().getValueType() == MVT::v8i32);
- //return false;
- return N != 0;
-}]>;
-
-def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_gather node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
- // return (Mgt->getIndex().getValueType() == MVT::v8i64 ||
- // Mgt->getBasePtr().getValueType() == MVT::v8i64);
- //return false;
- return N != 0;
-}]>;
-def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_gather node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
- // return (Mgt->getIndex().getValueType() == MVT::v16i32 ||
- // Mgt->getBasePtr().getValueType() == MVT::v16i32);
- //return false;
- return N != 0;
-}]>;
-
-def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_scatter node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
- // return (Sc->getIndex().getValueType() == MVT::v8i32 ||
- // Sc->getBasePtr().getValueType() == MVT::v8i32);
- //return false;
- return N != 0;
-}]>;
-
-def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_scatter node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
- // return (Sc->getIndex().getValueType() == MVT::v8i64 ||
- // Sc->getBasePtr().getValueType() == MVT::v8i64);
- //return false;
- return N != 0;
-}]>;
-def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_scatter node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
- // return (Sc->getIndex().getValueType() == MVT::v16i32 ||
- // Sc->getBasePtr().getValueType() == MVT::v16i32);
- //return false;
- return N != 0;
-}]>;
-
// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
@@ -681,3 +629,55 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
return X86::isVINSERT256Index(N);
}], INSERT_get_vinsert256_imm>;
+def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedLoadSDNode>(N))
+ return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
+ return false;
+}]>;
+
+def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedLoadSDNode>(N))
+ return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
+ return false;
+}]>;
+
+def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedLoadSDNode>(N))
+ return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
+ return false;
+}]>;
+
+def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ return (dyn_cast<MaskedLoadSDNode>(N) != 0);
+}]>;
+
+def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedStoreSDNode>(N))
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
+ return false;
+}]>;
+
+def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedStoreSDNode>(N))
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
+ return false;
+}]>;
+
+def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedStoreSDNode>(N))
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
+ return false;
+}]>;
+
+def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ return (dyn_cast<MaskedStoreSDNode>(N) != 0);
+}]>;
+
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index f5b9680..538ec1c 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -104,7 +104,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
: X86GenInstrInfo(
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
- Subtarget(STI), RI(STI) {
+ Subtarget(STI), RI(STI.getTargetTriple()) {
static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
@@ -4573,9 +4573,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
return nullptr;
// Check whether we can fold the def into SrcOperandId.
- SmallVector<unsigned, 8> Ops;
- Ops.push_back(SrcOperandId);
- MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI);
if (FoldMI) {
FoldAsLoadDefReg = 0;
return FoldMI;
@@ -4670,7 +4668,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
}
static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
- const SmallVectorImpl<MachineOperand> &MOs,
+ ArrayRef<MachineOperand> MOs,
MachineInstr *MI,
const TargetInstrInfo &TII) {
// Create the base instruction with the memory operand as the first part.
@@ -4697,9 +4695,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
return MIB;
}
-static MachineInstr *FuseInst(MachineFunction &MF,
- unsigned Opcode, unsigned OpNo,
- const SmallVectorImpl<MachineOperand> &MOs,
+static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
+ unsigned OpNo, ArrayRef<MachineOperand> MOs,
MachineInstr *MI, const TargetInstrInfo &TII) {
// Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
@@ -4723,7 +4720,7 @@ static MachineInstr *FuseInst(MachineFunction &MF,
}
static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
- const SmallVectorImpl<MachineOperand> &MOs,
+ ArrayRef<MachineOperand> MOs,
MachineInstr *MI) {
MachineFunction &MF = *MI->getParent()->getParent();
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
@@ -4736,12 +4733,12 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
return MIB.addImm(0);
}
-MachineInstr*
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI, unsigned OpNum,
- const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Align,
- bool AllowCommute) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ unsigned OpNum,
+ ArrayRef<MachineOperand> MOs,
+ unsigned Size, unsigned Align,
+ bool AllowCommute) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = Subtarget.callRegIndirect();
@@ -5104,10 +5101,10 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
MI->addRegisterKilled(Reg, TRI, true);
}
-MachineInstr*
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// Check switch flag
if (NoFusing) return nullptr;
@@ -5145,10 +5142,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
} else if (Ops.size() != 1)
return nullptr;
- SmallVector<MachineOperand,4> MOs;
- MOs.push_back(MachineOperand::CreateFI(FrameIndex));
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
- Size, Alignment, /*AllowCommute=*/true);
+ return foldMemoryOperandImpl(MF, MI, Ops[0],
+ MachineOperand::CreateFI(FrameIndex), Size,
+ Alignment, /*AllowCommute=*/true);
}
static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
@@ -5170,9 +5166,9 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
return false;
}
-MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ ArrayRef<unsigned> Ops,
MachineInstr *LoadMI) const {
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
@@ -5295,8 +5291,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return nullptr;
// Folding a normal load. Just copy the load's address operands.
- for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
- MOs.push_back(LoadMI->getOperand(i));
+ MOs.append(LoadMI->operands_begin() + NumOps - X86::AddrNumOperands,
+ LoadMI->operands_begin() + NumOps);
break;
}
}
@@ -5304,9 +5300,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
/*Size=*/0, Alignment, /*AllowCommute=*/true);
}
-
bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
+ ArrayRef<unsigned> Ops) const {
// Check switch flag
if (NoFusing) return 0;
@@ -5559,7 +5554,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
}
if (Load)
BeforeOps.push_back(SDValue(Load, 0));
- std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+ BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end());
SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
NewNodes.push_back(NewNode);
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 4d15467..0dd8101 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -305,23 +305,21 @@ public:
/// folding and return true, otherwise it should return false. If it folds
/// the instruction, it is likely that the MachineInstruction the iterator
/// references has been changed.
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const override;
/// canFoldMemoryOperand - Returns true if the specified load / store is
/// folding is possible.
- bool canFoldMemoryOperand(const MachineInstr*,
- const SmallVectorImpl<unsigned> &) const override;
+ bool canFoldMemoryOperand(const MachineInstr *,
+ ArrayRef<unsigned>) const override;
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a store or a load and a store into two or more instruction. If this is
@@ -406,10 +404,9 @@ public:
void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const override;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned OpNum,
- const SmallVectorImpl<MachineOperand> &MOs,
+ ArrayRef<MachineOperand> MOs,
unsigned Size, unsigned Alignment,
bool AllowCommute) const;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 9881caf..e9a0431 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -572,10 +572,13 @@ def X86GR32orGR64AsmOperand : AsmOperandClass {
def GR32orGR64 : RegisterOperand<GR32> {
let ParserMatchClass = X86GR32orGR64AsmOperand;
}
-
+def AVX512RCOperand : AsmOperandClass {
+ let Name = "AVX512RC";
+}
def AVX512RC : Operand<i32> {
let PrintMethod = "printRoundingControl";
let OperandType = "OPERAND_IMMEDIATE";
+ let ParserMatchClass = AVX512RCOperand;
}
// Sign-extended immediate classes. We don't need to define the full lattice
@@ -713,9 +716,6 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def vectoraddr : ComplexPattern<iPTR, 5, "SelectAddr", [],[SDNPWantParent]>;
-//def vectoraddr : ComplexPattern<iPTR, 5, "SelectVectorAddr", [],[SDNPWantParent]>;
-
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
@@ -855,11 +855,11 @@ def X86_COND_E_OR_NE : ImmLeaf<i8, [{
return (Imm == X86::COND_E) || (Imm == X86::COND_NE);
}]>;
-let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
- def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
- def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
- def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
-}
+
+def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+
def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index d2929d2..ccdbf0e 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3567,7 +3567,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem, ssmem, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- itins, HasAVX, "SS">, XS, VEX_4V, VEX_LIG;
+ itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3579,7 +3579,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
f64mem, sdmem, sse_load_f64,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
- OpNode, itins, HasAVX, "SD">, XD, VEX_4V, VEX_LIG;
+ OpNode, itins, UseAVX, "SD">, XD, VEX_4V, VEX_LIG;
}
// Square root.
@@ -4077,7 +4077,7 @@ defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
@@ -4123,7 +4123,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
}
} // Predicates = [HasAVX]
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
@@ -5902,7 +5902,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
(!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
// On AVX2, we also support 256bit inputs.
- // FIXME: remove these patterns when the old shuffle lowering goes away.
def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))),
(!cast<I>(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))),
@@ -6955,6 +6954,34 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
+/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
+ itins.rr>, Sched<[itins.Sched]>;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
@@ -6963,26 +6990,24 @@ let Predicates = [HasAVX] in {
}
let ExeDomain = SSEPackedSingle in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, loadv4f32, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, loadv8f32,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
+ defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32,
+ VR128, loadv4f32, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32,
+ VR256, loadv8f32, f256mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, loadv2f64, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256,VR256, loadv4f64,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
+ defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
+ VR128, loadv2f64, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
+ VR256, loadv4f64, f256mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
}
- defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
+ defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
+ VR128, loadv2i64, i128mem, 0,
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
@@ -7004,9 +7029,9 @@ let Predicates = [HasAVX2] in {
VR256, loadv4i64, i256mem, 0,
DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
}
- defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, loadv4i64, i256mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
+ defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
+ VR256, loadv4i64, i256mem, 0,
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -7016,16 +7041,16 @@ let Constraints = "$src1 = $dst" in {
1, SSE_MPSADBW_ITINS>;
}
let ExeDomain = SSEPackedSingle in
- defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv4f32, f128mem,
- 1, SSE_INTALU_ITINS_FBLEND_P>;
+ defm BLENDPS : SS41I_binop_rmi<0x0C, "blendps", X86Blendi, v4f32,
+ VR128, memopv4f32, f128mem,
+ 1, SSE_INTALU_ITINS_FBLEND_P>;
let ExeDomain = SSEPackedDouble in
- defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, f128mem,
- 1, SSE_INTALU_ITINS_FBLEND_P>;
- defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv2i64, i128mem,
- 1, SSE_INTALU_ITINS_BLEND_P>;
+ defm BLENDPD : SS41I_binop_rmi<0x0D, "blendpd", X86Blendi, v2f64,
+ VR128, memopv2f64, f128mem,
+ 1, SSE_INTALU_ITINS_FBLEND_P>;
+ defm PBLENDW : SS41I_binop_rmi<0x0E, "pblendw", X86Blendi, v8i16,
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTALU_ITINS_BLEND_P>;
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv4f32, f128mem, 1,
@@ -7116,32 +7141,12 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
(v4f64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-
- def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
- (imm:$mask))),
- (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
- def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
- (imm:$mask))),
- (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
-
- def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
- (imm:$mask))),
- (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
- (imm:$mask))),
- (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
- (imm:$mask))),
- (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
}
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
(VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
- (imm:$mask))),
- (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
}
// Patterns
@@ -7260,17 +7265,6 @@ let Predicates = [UseSSE41] in {
def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
(v2f64 VR128:$src2))),
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
-
- def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
- (imm:$mask))),
- (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
- (imm:$mask))),
- (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
- (imm:$mask))),
- (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
-
}
let SchedRW = [WriteLoad] in {
@@ -7840,9 +7834,9 @@ def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
WriteFShuffle256>, VEX_L;
let Predicates = [HasAVX2] in
-def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
- int_x86_avx2_vbroadcasti128, WriteLoad>,
- VEX_L;
+def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256,
+ i128mem, v4i64, loadv2i64,
+ WriteLoad>, VEX_L;
let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
@@ -8238,38 +8232,31 @@ let Predicates = [HasF16C] in {
// AVX2 Instructions
//===----------------------------------------------------------------------===//
-/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
-multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop> {
+/// AVX2_binop_rmi - AVX2 binary operator with 8-bit immediate
+multiclass AVX2_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
Sched<[WriteBlend]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (IntId RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V;
}
-defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
- VR128, loadv2i64, i128mem>;
-defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
- VR256, loadv4i64, i256mem>, VEX_L;
-
-def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
- imm:$mask)),
- (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
-def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
- imm:$mask)),
- (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32,
+ VR128, loadv2i64, i128mem>;
+defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
+ VR256, loadv4i64, i256mem>, VEX_L;
//===----------------------------------------------------------------------===//
// VPBROADCAST - Load from memory and broadcast to all elements of the
@@ -8608,9 +8595,7 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
//
def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, u8imm:$src2),
- "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
Sched<[WriteShuffle256]>, VEX, VEX_L;
let hasSideEffects = 0, mayStore = 1 in
def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index e436811..42256b2 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 6af59d4..cd3076d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -77,8 +77,8 @@ namespace llvm {
X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &F) {
MF = &F;
CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(
- *MF->getSubtarget().getInstrInfo(), *MF->getSubtarget().getRegisterInfo(),
- MF->getSubtarget(), MF->getContext()));
+ *MF->getSubtarget().getInstrInfo(),
+ *MF->getSubtarget().getRegisterInfo(), MF->getContext()));
}
void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index cab7ce8..06545bc 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "X86RegisterInfo.h"
+#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
@@ -53,26 +54,26 @@ static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
-X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI)
- : X86GenRegisterInfo(
- (STI.is64Bit() ? X86::RIP : X86::EIP),
- X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), false),
- X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), true),
- (STI.is64Bit() ? X86::RIP : X86::EIP)),
- Subtarget(STI) {
+X86RegisterInfo::X86RegisterInfo(const Triple &TT)
+ : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
+ X86_MC::getDwarfRegFlavour(TT, false),
+ X86_MC::getDwarfRegFlavour(TT, true),
+ (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
X86_MC::InitLLVM2SEHRegisterMapping(this);
// Cache some information.
- Is64Bit = Subtarget.is64Bit();
- IsWin64 = Subtarget.isTargetWin64();
+ Is64Bit = TT.isArch64Bit();
+ IsWin64 = Is64Bit && TT.isOSWindows();
// Use a callee-saved register as the base pointer. These registers must
// not conflict with any ABI requirements. For example, in 32-bit mode PIC
// requires GOT in the EBX register before function calls via PLT GOT pointer.
if (Is64Bit) {
SlotSize = 8;
- bool Use64BitReg =
- Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64();
+ // This matches the simplified 32-bit pointer code in the data layout
+ // computation.
+ // FIXME: Should use the data layout?
+ bool Use64BitReg = TT.getEnvironment() != Triple::GNUX32;
StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
@@ -120,8 +121,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
}
-const TargetRegisterClass*
-X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+const TargetRegisterClass *
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
// Don't allow super-classes of GR8_NOREX. This class is only used after
// extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
// to the full GR8 register class in 64-bit mode, so we cannot allow the
@@ -161,6 +163,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
const TargetRegisterClass *
X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
switch (Kind) {
default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
case 0: // Normal GPRs.
@@ -172,9 +175,9 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
case 2: // Available for tailcall (not callee-saved GPRs).
- if (Subtarget.isTargetWin64())
+ if (IsWin64)
return &X86::GR64_TCW64RegClass;
- else if (Subtarget.is64Bit())
+ else if (Is64Bit)
return &X86::GR64_TCRegClass;
const Function *F = MF.getFunction();
@@ -210,7 +213,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case X86::GR64RegClassID:
return 12 - FPDiff;
case X86::VR128RegClassID:
- return Subtarget.is64Bit() ? 10 : 4;
+ return Is64Bit ? 10 : 4;
case X86::VR64RegClassID:
return 4;
}
@@ -218,8 +221,10 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const MCPhysReg *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool CallsEHReturn = MF->getMMI().callsEHReturn();
assert(MF && "MachineFunction required");
switch (MF->getFunction()->getCallingConv()) {
@@ -253,11 +258,16 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Is64Bit)
return CSR_64_MostRegs_SaveList;
break;
+ case CallingConv::X86_64_Win64:
+ return CSR_Win64_SaveList;
+ case CallingConv::X86_64_SysV:
+ if (CallsEHReturn)
+ return CSR_64EHRet_SaveList;
+ return CSR_64_SaveList;
default:
break;
}
- bool CallsEHReturn = MF->getMMI().callsEHReturn();
if (Is64Bit) {
if (IsWin64)
return CSR_Win64_SaveList;
@@ -270,8 +280,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_32_SaveList;
}
-const uint32_t*
-X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
@@ -308,6 +320,10 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
break;
default:
break;
+ case CallingConv::X86_64_Win64:
+ return CSR_Win64_RegMask;
+ case CallingConv::X86_64_SysV:
+ return CSR_64_RegMask;
}
// Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
@@ -349,7 +365,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Set the base-pointer register and its aliases as reserved if needed.
if (hasBasePointer(MF)) {
CallingConv::ID CC = MF.getFunction()->getCallingConv();
- const uint32_t* RegMask = getCallPreservedMask(CC);
+ const uint32_t *RegMask = getCallPreservedMask(MF, CC);
if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
report_fatal_error(
"Stack realignment in presence of dynamic allocas is not supported with"
@@ -393,7 +409,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*AI);
}
}
- if (!Is64Bit || !Subtarget.hasAVX512()) {
+ if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
for (unsigned n = 16; n != 32; ++n) {
for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
Reserved.set(*AI);
@@ -486,6 +502,24 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else
BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+ // FRAME_ALLOC uses a single offset, with no register. It only works in the
+ // simple FP case, and doesn't work with stack realignment. On 32-bit, the
+ // offset is from the traditional base pointer location. On 64-bit, the
+ // offset is from the SP at the end of the prologue, not the FP location. This
+ // matches the behavior of llvm.frameaddress.
+ if (Opc == TargetOpcode::FRAME_ALLOC) {
+ MachineOperand &FI = MI.getOperand(FIOperandNum);
+ bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ int Offset;
+ if (IsWinEH)
+ Offset = static_cast<const X86FrameLowering *>(TFI)
+ ->getFrameIndexOffsetFromSP(MF, FrameIndex);
+ else
+ Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
+ FI.ChangeToImmediate(Offset);
+ return;
+ }
+
// For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
// register as source operand, semantic is the same and destination is
// 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
@@ -537,8 +571,9 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
-unsigned X86RegisterInfo::getPtrSizedFrameRegister(
- const MachineFunction &MF) const {
+unsigned
+X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
unsigned FrameReg = getFrameRegister(MF);
if (Subtarget.isTarget64BitILP32())
FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false);
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 406b1fc..74edab9 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -20,14 +20,7 @@
#include "X86GenRegisterInfo.inc"
namespace llvm {
- class Type;
- class TargetInstrInfo;
- class X86Subtarget;
-
class X86RegisterInfo final : public X86GenRegisterInfo {
-public:
- const X86Subtarget &Subtarget;
-
private:
/// Is64Bit - Is the target 64-bits.
///
@@ -55,7 +48,7 @@ private:
unsigned BasePtr;
public:
- X86RegisterInfo(const X86Subtarget &STI);
+ X86RegisterInfo(const Triple &TT);
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
@@ -76,8 +69,9 @@ public:
getSubClassWithSubReg(const TargetRegisterClass *RC,
unsigned Idx) const override;
- const TargetRegisterClass*
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
@@ -98,7 +92,8 @@ public:
/// callee-save registers on this target.
const MCPhysReg *
getCalleeSavedRegs(const MachineFunction* MF) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const;
/// getReservedRegs - Returns a bitset indexed by physical register number
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 61c0600..677e824 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -2014,7 +2014,7 @@ def : InstRW<[WriteFMADDr],
// 3p forms.
"VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
// 3s forms.
- "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)r",
+ "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r",
// 4s/4s_int forms.
"VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
// 4p forms.
@@ -2031,7 +2031,7 @@ def : InstRW<[WriteFMADDm],
// 3p forms.
"VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
// 3s forms.
- "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)m",
+ "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m",
// 4s/4s_int forms.
"VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
// 4p forms.
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 7feabf6..ca8fc9c 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -62,8 +62,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
#ifndef NDEBUG
// If the base register might conflict with our physical registers, bail out.
- unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
- X86::ECX, X86::EAX, X86::EDI};
+ const unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
+ X86::ECX, X86::EAX, X86::EDI};
assert(!isBaseRegConflictPossible(DAG, ClobberSet));
#endif
@@ -228,8 +228,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
return SDValue();
// If the base register might conflict with our physical registers, bail out.
- unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
- X86::ECX, X86::ESI, X86::EDI};
+ const unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
+ X86::ECX, X86::ESI, X86::EDI};
if (isBaseRegConflictPossible(DAG, ClobberSet))
return SDValue();
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 4bde053..43d3895 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -37,10 +37,10 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<TargetLoweringObjectFileMachO>();
}
- if (TT.isOSLinux())
- return make_unique<X86LinuxTargetObjectFile>();
+ if (TT.isOSLinux() || TT.isOSNaCl())
+ return make_unique<X86LinuxNaClTargetObjectFile>();
if (TT.isOSBinFormatELF())
- return make_unique<TargetLoweringObjectFileELF>();
+ return make_unique<X86ELFTargetObjectFile>();
if (TT.isKnownWindowsMSVCEnvironment())
return make_unique<X86WindowsTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
@@ -94,9 +94,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, computeDataLayout(Triple(TT)), TT, CPU, FS, Options,
+ RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- DL(computeDataLayout(Triple(TT))),
Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
// default to hard float ABI
if (Options.FloatABIType == FloatABI::Default)
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 283858d..c9833ed 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -24,8 +24,6 @@ class StringRef;
class X86TargetMachine final : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- // Calculates type size & alignment
- const DataLayout DL;
X86Subtarget Subtarget;
mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap;
@@ -35,8 +33,6 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~X86TargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; }
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
TargetIRAnalysis getTargetIRAnalysis() override;
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 1d1c32e..d65d3b0 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -15,17 +15,13 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
using namespace dwarf;
-X86_64MachoTargetObjectFile::X86_64MachoTargetObjectFile()
- : TargetLoweringObjectFileMachO() {
- SupportIndirectSymViaGOTPCRel = true;
-}
-
const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
@@ -52,28 +48,30 @@ MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol(
}
const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
- const MCSymbol *Sym, int64_t Offset) const {
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
// On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry
// from a data section. In case there's an additional offset, then use
// foo@GOTPCREL+4+<offset>.
+ unsigned FinalOff = Offset+MV.getConstant()+4;
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
- const MCExpr *Off = MCConstantExpr::Create(Offset+4, getContext());
+ const MCExpr *Off = MCConstantExpr::Create(FinalOff, getContext());
return MCBinaryExpr::CreateAdd(Res, Off, getContext());
}
+const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol(
+ const MCSymbol *Sym) const {
+ return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
+}
+
void
-X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
}
-const MCExpr *
-X86LinuxTargetObjectFile::getDebugThreadLocalSymbol(
- const MCSymbol *Sym) const {
- return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
-}
-
const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
const ConstantExpr *CE, Mangler &Mang, const TargetMachine &TM) const {
// We are looking for the difference of two symbols, need a subtraction
@@ -97,14 +95,12 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
SubRHS->getPointerAddressSpace() != 0)
return nullptr;
- // Both ptrtoint instructions must wrap global variables:
+ // Both ptrtoint instructions must wrap global objects:
// - Only global variables are eligible for image relative relocations.
- // - The subtrahend refers to the special symbol __ImageBase, a global.
- const GlobalVariable *GVLHS =
- dyn_cast<GlobalVariable>(SubLHS->getPointerOperand());
- const GlobalVariable *GVRHS =
- dyn_cast<GlobalVariable>(SubRHS->getPointerOperand());
- if (!GVLHS || !GVRHS)
+ // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable.
+ const auto *GOLHS = dyn_cast<GlobalObject>(SubLHS->getPointerOperand());
+ const auto *GVRHS = dyn_cast<GlobalVariable>(SubRHS->getPointerOperand());
+ if (!GOLHS || !GVRHS)
return nullptr;
// We expect __ImageBase to be a global variable without a section, externally
@@ -117,10 +113,10 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
return nullptr;
// An image-relative, thread-local, symbol makes no sense.
- if (GVLHS->isThreadLocal())
+ if (GOLHS->isThreadLocal())
return nullptr;
- return MCSymbolRefExpr::Create(TM.getSymbol(GVLHS, Mang),
+ return MCSymbolRefExpr::Create(TM.getSymbol(GOLHS, Mang),
MCSymbolRefExpr::VK_COFF_IMGREL32,
getContext());
}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index f745538..2e25fb2 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -19,8 +19,6 @@ namespace llvm {
/// x86-64.
class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
- X86_64MachoTargetObjectFile();
-
const MCExpr *
getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
Mangler &Mang, const TargetMachine &TM,
@@ -33,20 +31,25 @@ namespace llvm {
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
- const MCExpr *
- getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
- int64_t Offset) const override;
+ const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
};
- /// X86LinuxTargetObjectFile - This implementation is used for linux x86
- /// and x86-64.
- class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-
+ /// \brief This implemenatation is used for X86 ELF targets that don't
+ /// have a further specialization.
+ class X86ELFTargetObjectFile : public TargetLoweringObjectFileELF {
/// \brief Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
};
+ /// X86LinuxNaClTargetObjectFile - This implementation is used for linux and
+ /// Native Client on x86 and x86-64.
+ class X86LinuxNaClTargetObjectFile : public X86ELFTargetObjectFile {
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+ };
+
/// \brief This implementation is used for Windows targets on x86 and x86-64.
class X86WindowsTargetObjectFile : public TargetLoweringObjectFileCOFF {
const MCExpr *
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 4073549..d0a09b2 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -126,15 +126,11 @@ void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
}
}
-static MCStreamer *
-createXCoreMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new XCoreTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new XCoreTargetAsmStreamer(S, OS);
}
// Force static initialization.
@@ -160,5 +156,6 @@ extern "C" void LLVMInitializeXCoreTargetMC() {
TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget,
createXCoreMCInstPrinter);
- TargetRegistry::RegisterAsmStreamer(TheXCoreTarget, createXCoreMCAsmStreamer);
+ TargetRegistry::RegisterAsmTargetStreamer(TheXCoreTarget,
+ createTargetAsmStreamer);
}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 0ff5961..28e0275 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
+
namespace llvm {
class Target;
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index f79b78b..5c7ea5e 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ namespace {
// Complex Pattern Selectors.
bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
const char *getPassName() const override {
@@ -108,12 +108,12 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
}
bool XCoreDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
SDValue Reg;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default: return true;
- case 'm': // Memory.
+ case InlineAsm::Constraint_m: // Memory.
switch (Op.getOpcode()) {
default: return true;
case XCoreISD::CPRelativeWrapper:
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 213ae4a..b20fc01 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -177,6 +177,12 @@ namespace llvm {
const std::string &Constraint,
MVT VT) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
// Expand specifics
SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 5c666ae..1d569e8 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -208,8 +208,8 @@ bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
MF.getFunction()->needsUnwindTableEntry();
}
-const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
- const {
+const MCPhysReg *
+XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// The callee saved registers LR & FP are explicitly handled during
// emitPrologue & emitEpilogue and related functions.
static const MCPhysReg CalleeSavedRegs[] = {
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 5d7721c..010fccd 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -29,8 +29,7 @@ public:
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 7998fc1..228dc1c 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -27,9 +27,10 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(
+ T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
+ TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<XCoreTargetObjectFile>()),
- DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index c5df07c..0d324ab 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -21,7 +21,6 @@ namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment
XCoreSubtarget Subtarget;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
@@ -30,8 +29,10 @@ public:
CodeGenOpt::Level OL);
~XCoreTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const XCoreSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 7e48ce3..46480bd 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -69,16 +69,15 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override;
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) {
+ : CallGraphSCCPass(ID), maxElements(maxElements) {
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
}
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
- const DataLayout *DL;
private:
- bool isDenselyPacked(Type *type);
+ bool isDenselyPacked(Type *type, const DataLayout &DL);
bool canPaddingBeAccessed(Argument *Arg);
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
@@ -109,9 +108,6 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false, LocalChange;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
-
do { // Iterate until we stop promoting from this SCC.
LocalChange = false;
// Attempt to promote arguments from all functions in this SCC.
@@ -128,7 +124,7 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
}
/// \brief Checks if a type could have padding bytes.
-bool ArgPromotion::isDenselyPacked(Type *type) {
+bool ArgPromotion::isDenselyPacked(Type *type, const DataLayout &DL) {
// There is no size information, so be conservative.
if (!type->isSized())
@@ -136,7 +132,7 @@ bool ArgPromotion::isDenselyPacked(Type *type) {
// If the alloc size is not equal to the storage size, then there are padding
// bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
- if (!DL || DL->getTypeSizeInBits(type) != DL->getTypeAllocSizeInBits(type))
+ if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
return false;
if (!isa<CompositeType>(type))
@@ -144,19 +140,20 @@ bool ArgPromotion::isDenselyPacked(Type *type) {
// For homogenous sequential types, check for padding within members.
if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
- return isa<PointerType>(seqTy) || isDenselyPacked(seqTy->getElementType());
+ return isa<PointerType>(seqTy) ||
+ isDenselyPacked(seqTy->getElementType(), DL);
// Check for padding within and between elements of a struct.
StructType *StructTy = cast<StructType>(type);
- const StructLayout *Layout = DL->getStructLayout(StructTy);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
uint64_t StartPos = 0;
for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
Type *ElTy = StructTy->getElementType(i);
- if (!isDenselyPacked(ElTy))
+ if (!isDenselyPacked(ElTy, DL))
return false;
if (StartPos != Layout->getElementOffsetInBits(i))
return false;
- StartPos += DL->getTypeAllocSizeInBits(ElTy);
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
}
return true;
@@ -236,6 +233,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// IR, while in the callee the classification is determined dynamically based
// on the number of registers consumed so far.
if (F->isVarArg()) return nullptr;
+ const DataLayout &DL = F->getParent()->getDataLayout();
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
@@ -250,8 +248,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// packed or if we can prove the padding bytes are never accessed. This does
// not apply to inalloca.
bool isSafeToPromote =
- PtrArg->hasByValAttr() &&
- (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg));
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
if (isSafeToPromote) {
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
if (maxElements > 0 && STy->getNumElements() > maxElements) {
@@ -310,9 +308,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
- const DataLayout *DL) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
+ const DataLayout &DL = Callee->getParent()->getDataLayout();
unsigned ArgNo = Arg->getArgNo();
@@ -430,7 +428,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL))
+ if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -586,7 +584,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
FunctionType *FTy = F->getFunctionType();
std::vector<Type*> Params;
- typedef std::set<IndicesVector> ScalarizeTable;
+ typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
// ScalarizedElements - If we are promoting a pointer that has elements
// accessed out of it, keep track of which elements are accessed so that we
@@ -623,8 +621,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Simple byval argument? Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- Params.push_back(STy->getElementType(i));
+ Params.insert(Params.end(), STy->element_begin(), STy->element_end());
++NumByValArgsPromoted;
} else if (!ArgsToPromote.count(I)) {
// Unchanged argument
@@ -647,7 +644,11 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ScalarizeTable &ArgIndices = ScalarizedElements[I];
for (User *U : I->users()) {
Instruction *UI = cast<Instruction>(U);
- assert(isa<LoadInst>(UI) || isa<GetElementPtrInst>(UI));
+ Type *SrcTy;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ SrcTy = L->getType();
+ else
+ SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
IndicesVector Indices;
Indices.reserve(UI->getNumOperands() - 1);
// Since loads will only have a single operand, and GEPs only a single
@@ -659,7 +660,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// GEPs with a single 0 index can be merged with direct loads
if (Indices.size() == 1 && Indices.front() == 0)
Indices.clear();
- ArgIndices.insert(Indices);
+ ArgIndices.insert(std::make_pair(SrcTy, Indices));
LoadInst *OrigLoad;
if (LoadInst *L = dyn_cast<LoadInst>(UI))
OrigLoad = L;
@@ -673,11 +674,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
// not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
+ Params.push_back(
+ GetElementPtrInst::getIndexedType(I->getType(), SI->second));
assert(Params.back());
}
- if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
++NumArgumentsPromoted;
else
++NumAggregatesPromoted;
@@ -768,9 +770,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
- (*AI)->getName()+"."+utostr(i),
- Call);
+ Value *Idx = GetElementPtrInst::Create(
+ STy, *AI, Idxs, (*AI)->getName() + "." + utostr(i), Call);
// TODO: Tell AA about the new values?
Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
}
@@ -783,12 +784,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
Value *V = *AI;
- LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, *SI)];
- if (!SI->empty()) {
- Ops.reserve(SI->size());
+ LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)];
+ if (!SI->second.empty()) {
+ Ops.reserve(SI->second.size());
Type *ElTy = V->getType();
- for (IndicesVector::const_iterator II = SI->begin(),
- IE = SI->end(); II != IE; ++II) {
+ for (IndicesVector::const_iterator II = SI->second.begin(),
+ IE = SI->second.end();
+ II != IE; ++II) {
// Use i32 to index structs, and i64 for others (pointers/arrays).
// This satisfies GEP constraints.
Type *IdxTy = (ElTy->isStructTy() ?
@@ -799,7 +801,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
}
// And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
+ V = GetElementPtrInst::Create(SI->first, V, Ops,
+ V->getName() + ".idx", Call);
Ops.clear();
AA.copyValue(OrigLoad->getOperand(0), V);
}
@@ -903,10 +906,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx =
- GetElementPtrInst::Create(TheAlloca, Idxs,
- TheAlloca->getName()+"."+Twine(i),
- InsertPt);
+ Value *Idx = GetElementPtrInst::Create(
+ AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+ InsertPt);
I2->setName(I->getName()+"."+Twine(i));
new StoreInst(I2++, Idx, InsertPt);
}
@@ -939,7 +941,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
while (!I->use_empty()) {
if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
- assert(ArgIndices.begin()->empty() &&
+ assert(ArgIndices.begin()->second.empty() &&
"Load element should sort to front!");
I2->setName(I->getName()+".val");
LI->replaceAllUsesWith(I2);
@@ -961,7 +963,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Function::arg_iterator TheArg = I2;
for (ScalarizeTable::iterator It = ArgIndices.begin();
- *It != Operands; ++It, ++TheArg) {
+ It->second != Operands; ++It, ++TheArg) {
assert(It != ArgIndices.end() && "GEP not handled??");
}
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 0b6ade9..8ce7646 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -52,7 +52,6 @@ namespace {
// alignment to a concrete value.
unsigned getAlignment(GlobalVariable *GV) const;
- const DataLayout *DL;
};
}
@@ -89,32 +88,22 @@ static bool IsBetterCanonical(const GlobalVariable &A,
return A.hasUnnamedAddr();
}
-bool ConstantMerge::hasKnownAlignment(GlobalVariable *GV) const {
- return DL || GV->getAlignment() != 0;
-}
-
unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
unsigned Align = GV->getAlignment();
if (Align)
return Align;
- if (DL)
- return DL->getPreferredAlignment(GV);
- return 0;
+ return GV->getParent()->getDataLayout().getPreferredAlignment(GV);
}
bool ConstantMerge::runOnModule(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
-
- // Map unique <constants, has-unknown-alignment> pairs to globals. We don't
- // want to merge globals of unknown alignment with those of explicit
- // alignment. If we have DataLayout, we always know the alignment.
- DenseMap<PointerIntPair<Constant*, 1, bool>, GlobalVariable*> CMap;
+
+ // Map unique constants to globals.
+ DenseMap<Constant *, GlobalVariable *> CMap;
// Replacements - This vector contains a list of replacements to perform.
SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
@@ -156,8 +145,7 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
- GlobalVariable *&Slot = CMap[Pair];
+ GlobalVariable *&Slot = CMap[Init];
// If this is the first constant we find or if the old one is local,
// replace with the current one. If the current is externally visible
@@ -188,8 +176,7 @@ bool ConstantMerge::runOnModule(Module &M) {
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
- PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
- GlobalVariable *Slot = CMap[Pair];
+ GlobalVariable *Slot = CMap[Init];
if (!Slot || Slot == GV)
continue;
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 0c844fe..ba04c80 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Pass.h"
+#include <unordered_map>
using namespace llvm;
#define DEBUG_TYPE "globaldce"
@@ -47,6 +48,7 @@ namespace {
private:
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
SmallPtrSet<Constant *, 8> SeenConstants;
+ std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
/// GlobalIsNeeded - mark the specific global value as needed, and
/// recursively mark anything that it uses as also needed.
@@ -78,6 +80,17 @@ bool GlobalDCE::runOnModule(Module &M) {
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+ // Collect the set of members for each comdat.
+ for (Function &F : M)
+ if (Comdat *C = F.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &F));
+ for (GlobalVariable &GV : M.globals())
+ if (Comdat *C = GV.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GV));
+ for (GlobalAlias &GA : M.aliases())
+ if (Comdat *C = GA.getComdat())
+ ComdatMembers.insert(std::make_pair(C, &GA));
+
// Loop over the module, adding globals which are obviously necessary.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
@@ -177,6 +190,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Make sure that all memory is released
AliveGlobals.clear();
SeenConstants.clear();
+ ComdatMembers.clear();
return Changed;
}
@@ -188,17 +202,9 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
if (!AliveGlobals.insert(G).second)
return;
- Module *M = G->getParent();
if (Comdat *C = G->getComdat()) {
- for (Function &F : *M)
- if (F.getComdat() == C)
- GlobalIsNeeded(&F);
- for (GlobalVariable &GV : M->globals())
- if (GV.getComdat() == C)
- GlobalIsNeeded(&GV);
- for (GlobalAlias &GA : M->aliases())
- if (GA.getComdat() == C)
- GlobalIsNeeded(&GA);
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ GlobalIsNeeded(CM.second);
}
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 45e04f1..20b41fb 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -38,7 +39,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -86,7 +86,6 @@ namespace {
const GlobalStatus &GS);
bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
- const DataLayout *DL;
TargetLibraryInfo *TLI;
SmallSet<const Comdat *, 8> NotDiscardableComdats;
};
@@ -269,7 +268,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
/// quick scan over the use list to clean up the easy and obvious cruft. This
/// returns true if it made a change.
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
bool Changed = false;
// Note that we need to use a weak value handle for the worklist items. When
@@ -318,8 +317,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// and will invalidate our notion of what Init is.
Constant *SubInit = nullptr;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
- ConstantExpr *CE =
- dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
+ ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
+ ConstantFoldInstruction(GEP, DL, TLI));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
@@ -580,8 +579,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
Idxs.push_back(NullInt);
for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
Idxs.push_back(GEPI->getOperand(i));
- NewPtr = GetElementPtrInst::Create(NewPtr, Idxs,
- GEPI->getName()+"."+Twine(Val),GEPI);
+ NewPtr = GetElementPtrInst::Create(
+ NewPtr->getType()->getPointerElementType(), NewPtr, Idxs,
+ GEPI->getName() + "." + Twine(Val), GEPI);
}
}
GEP->replaceAllUsesWith(NewPtr);
@@ -739,7 +739,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
/// if the loaded value is dynamically null, then we know that they cannot be
/// reachable with a null optimize away the load.
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
bool Changed = false;
@@ -802,7 +802,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
/// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
+static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
TargetLibraryInfo *TLI) {
for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
@@ -822,12 +822,10 @@ static void ConstantPropUsersOf(Value *V, const DataLayout *DL,
/// the specified malloc. Because it is always the result of the specified
/// malloc, there is no reason to actually DO the malloc. Instead, turn the
/// malloc into a global, and any loads of GV as uses of the new global.
-static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
- CallInst *CI,
- Type *AllocTy,
- ConstantInt *NElements,
- const DataLayout *DL,
- TargetLibraryInfo *TLI) {
+static GlobalVariable *
+OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
+ ConstantInt *NElements, const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
Type *GlobalType;
@@ -1167,7 +1165,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
InsertedScalarizedValues,
PHIsToRewrite),
LI->getName()+".f"+Twine(FieldNo), LI);
- } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ } else {
+ PHINode *PN = cast<PHINode>(V);
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
@@ -1181,8 +1180,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
PN->getName()+".f"+Twine(FieldNo), PN);
Result = NewPN;
PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
- } else {
- llvm_unreachable("Unknown usable value");
}
return FieldVals[FieldNo] = Result;
@@ -1224,7 +1221,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
GEPIdx.push_back(GEPI->getOperand(1));
GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
- Value *NGEPI = GetElementPtrInst::Create(NewPtr, GEPIdx,
+ Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
GEPI->getName(), GEPI);
GEPI->replaceAllUsesWith(NGEPI);
GEPI->eraseFromParent();
@@ -1271,7 +1268,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value *NElems, const DataLayout *DL,
+ Value *NElems, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
Type *MAT = getMallocAllocatedType(CI, TLI);
@@ -1301,10 +1298,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
GV->getThreadLocalMode());
FieldGlobals.push_back(NGV);
- unsigned TypeSize = DL->getTypeAllocSize(FieldTy);
+ unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
if (StructType *ST = dyn_cast<StructType>(FieldTy))
- TypeSize = DL->getStructLayout(ST)->getSizeInBytes();
- Type *IntPtrTy = DL->getIntPtrType(CI->getType());
+ TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
+ Type *IntPtrTy = DL.getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems, nullptr,
@@ -1459,16 +1456,12 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
/// pointer global variable with a single value stored it that is a malloc or
/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
- CallInst *CI,
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
Type *AllocTy,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
- if (!DL)
- return false;
-
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1504,7 +1497,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
- if (NElements->getZExtValue() * DL->getTypeAllocSize(AllocTy) < 2048) {
+ if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
return true;
}
@@ -1534,8 +1527,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is a fixed size array, transform the Malloc to be an alloc of
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
- Type *IntPtrTy = DL->getIntPtrType(CI->getType());
- unsigned TypeSize = DL->getStructLayout(AllocSTy)->getSizeInBytes();
+ Type *IntPtrTy = DL.getIntPtrType(CI->getType());
+ unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
@@ -1563,7 +1556,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
AtomicOrdering Ordering,
Module::global_iterator &GVI,
- const DataLayout *DL,
+ const DataLayout &DL,
TargetLibraryInfo *TLI) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1733,6 +1726,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI,
const GlobalStatus &GS) {
+ auto &DL = GV->getParent()->getDataLayout();
// If this is a first class global and has only one accessing function
// and this function is main (which we know is not recursive), we replace
// the global with a local alloca in this function.
@@ -1804,12 +1798,10 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
++NumMarked;
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
- if (DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>()) {
- const DataLayout &DL = DLP->getDataLayout();
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
- GVI = FirstNewGV; // Don't skip the newly produced globals!
- return true;
- }
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
+ GVI = FirstNewGV; // Don't skip the newly produced globals!
+ return true;
}
} else if (GS.StoredType == GlobalStatus::StoredOnce) {
// If the initial value for the global was an undef value, and if only
@@ -1954,6 +1946,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
// Simplify the initializer.
if (GV->hasInitializer())
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+ auto &DL = M.getDataLayout();
Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
if (New && New != CE)
GV->setInitializer(New);
@@ -1971,9 +1964,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL);
-
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL);
/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
/// handled by the code generator. We don't want to generate something like:
@@ -1983,9 +1975,10 @@ isSimpleEnoughValueToCommit(Constant *C,
/// This function should be called if C was not found (but just got inserted)
/// in SimpleConstants to avoid having to rescan the same constants all the
/// time.
-static bool isSimpleEnoughValueToCommitHelper(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL) {
+static bool
+isSimpleEnoughValueToCommitHelper(Constant *C,
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
// Simple global addresses are supported, do not allow dllimport or
// thread-local globals.
if (auto *GV = dyn_cast<GlobalValue>(C))
@@ -2019,8 +2012,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
case Instruction::PtrToInt:
// int <=> ptr is fine if the int type is the same size as the
// pointer type.
- if (!DL || DL->getTypeSizeInBits(CE->getType()) !=
- DL->getTypeSizeInBits(CE->getOperand(0)->getType()))
+ if (DL.getTypeSizeInBits(CE->getType()) !=
+ DL.getTypeSizeInBits(CE->getOperand(0)->getType()))
return false;
return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, DL);
@@ -2042,8 +2035,8 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
- SmallPtrSetImpl<Constant*> &SimpleConstants,
- const DataLayout *DL) {
+ SmallPtrSetImpl<Constant *> &SimpleConstants,
+ const DataLayout &DL) {
// If we already checked this constant, we win.
if (!SimpleConstants.insert(C).second)
return true;
@@ -2174,8 +2167,8 @@ namespace {
/// Once an evaluation call fails, the evaluation object should not be reused.
class Evaluator {
public:
- Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
- : DL(DL), TLI(TLI) {
+ Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
+ : DL(DL), TLI(TLI) {
ValueStack.emplace_back();
}
@@ -2249,7 +2242,7 @@ private:
/// simple enough to live in a static initializer of a global.
SmallPtrSet<Constant*, 8> SimpleConstants;
- const DataLayout *DL;
+ const DataLayout &DL;
const TargetLibraryInfo *TLI;
};
@@ -2498,9 +2491,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
Value *Ptr = PtrArg->stripPointerCasts();
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
- if (DL && !Size->isAllOnesValue() &&
+ if (!Size->isAllOnesValue() &&
Size->getValue().getLimitedValue() >=
- DL->getTypeStoreSize(ElemTy)) {
+ DL.getTypeStoreSize(ElemTy)) {
Invariants.insert(GV);
DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV
<< "\n");
@@ -2689,7 +2682,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
/// we can. Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
+static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Call the function.
Evaluator Eval(DL, TLI);
@@ -3040,8 +3033,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ auto &DL = M.getDataLayout();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool LocalChange = true;
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 305ad7a..3aa4ee5 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -29,7 +30,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -72,8 +72,8 @@ Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
InlineLimit : Threshold),
InsertLifetime(InsertLifetime) {}
-/// getAnalysisUsage - For this class, we declare that we require and preserve
-/// the call graph. If the derived class implements this method, it should
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
@@ -111,18 +111,17 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
Caller->addFnAttr(Attribute::StackProtect);
}
-/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// If it is possible to inline the specified call site,
/// do so and update the CallGraph for this operation.
///
/// This function also does some basic book-keeping to update the IR. The
/// InlinedArrayAllocas map keeps track of any allocas that are already
-/// available from other functions inlined into the caller. If we are able to
+/// available from other functions inlined into the caller. If we are able to
/// inline this call site we attempt to reuse already available allocas or add
/// any new allocas to the set if not possible.
static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
InlinedArrayAllocasTy &InlinedArrayAllocas,
- int InlineHistory, bool InsertLifetime,
- const DataLayout *DL) {
+ int InlineHistory, bool InsertLifetime) {
Function *Callee = CS.getCalledFunction();
Function *Caller = CS.getCaller();
@@ -198,11 +197,6 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
unsigned Align1 = AI->getAlignment(),
Align2 = AvailableAlloca->getAlignment();
- // If we don't have data layout information, and only one alloca is using
- // the target default, then we can't safely merge them because we can't
- // pick the greater alignment.
- if (!DL && (!Align1 || !Align2) && Align1 != Align2)
- continue;
// The available alloca has to be in the right function, not in some other
// function in this SCC.
@@ -223,8 +217,8 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
if (Align1 != Align2) {
if (!Align1 || !Align2) {
- assert(DL && "DataLayout required to compare default alignments");
- unsigned TypeAlign = DL->getABITypeAlignment(AI->getAllocatedType());
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+ unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
Align1 = Align1 ? Align1 : TypeAlign;
Align2 = Align2 ? Align2 : TypeAlign;
@@ -300,8 +294,7 @@ static void emitAnalysis(CallSite CS, const Twine &Msg) {
emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
}
-/// shouldInline - Return true if the inliner should attempt to inline
-/// at the given CallSite.
+/// Return true if the inliner should attempt to inline at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
InlineCost IC = getInlineCost(CS);
@@ -415,7 +408,7 @@ bool Inliner::shouldInline(CallSite CS) {
return true;
}
-/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// Return true if the specified inline history ID
/// indicates an inline history that includes the specified function.
static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
@@ -432,8 +425,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
@@ -495,7 +486,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, DL, AA, ACT);
+ InlineFunctionInfo InlineInfo(&CG, AA, ACT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -553,7 +544,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// Attempt to inline the function.
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID, InsertLifetime, DL)) {
+ InlineHistoryID, InsertLifetime)) {
emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
Twine(Callee->getName() +
" will not be inlined into " +
@@ -625,14 +616,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
return Changed;
}
-// doFinalization - Remove now-dead linkonce functions at the end of
-// processing to avoid breaking the SCC traversal.
+/// Remove now-dead linkonce functions at the end of
+/// processing to avoid breaking the SCC traversal.
bool Inliner::doFinalization(CallGraph &CG) {
return removeDeadFunctions(CG);
}
-/// removeDeadFunctions - Remove dead functions that are not included in
-/// DNR (Do Not Remove) list.
+/// Remove dead functions that are not included in DNR (Do Not Remove) list.
bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
SmallVector<CallGraphNode*, 16> FunctionsToRemove;
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp
index 0a22a80..fe00d92 100644
--- a/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/lib/Transforms/IPO/LowerBitSets.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalVariable.h"
@@ -31,10 +32,17 @@ using namespace llvm;
#define DEBUG_TYPE "lowerbitsets"
-STATISTIC(NumBitSetsCreated, "Number of bitsets created");
+STATISTIC(ByteArraySizeBits, "Byte array size in bits");
+STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
+STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered");
STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets");
+static cl::opt<bool> AvoidReuse(
+ "lowerbitsets-avoid-reuse",
+ cl::desc("Try to avoid reuse of byte array addresses using aliases"),
+ cl::Hidden, cl::init(true));
+
bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
if (Offset < ByteOffset)
return false;
@@ -46,11 +54,11 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
if (BitOffset >= BitSize)
return false;
- return (Bits[BitOffset / 8] >> (BitOffset % 8)) & 1;
+ return Bits.count(BitOffset);
}
bool BitSetInfo::containsValue(
- const DataLayout *DL,
+ const DataLayout &DL,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
uint64_t COffset) const {
if (auto GV = dyn_cast<GlobalVariable>(V)) {
@@ -61,8 +69,8 @@ bool BitSetInfo::containsValue(
}
if (auto GEP = dyn_cast<GEPOperator>(V)) {
- APInt APOffset(DL->getPointerSizeInBits(0), 0);
- bool Result = GEP->accumulateConstantOffset(*DL, APOffset);
+ APInt APOffset(DL.getPointerSizeInBits(0), 0);
+ bool Result = GEP->accumulateConstantOffset(DL, APOffset);
if (!Result)
return false;
COffset += APOffset.getZExtValue();
@@ -101,18 +109,15 @@ BitSetInfo BitSetBuilder::build() {
BSI.ByteOffset = Min;
BSI.AlignLog2 = 0;
- // FIXME: Can probably do something smarter if all offsets are 0.
if (Mask != 0)
BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
// Build the compressed bitset while normalizing the offsets against the
// computed alignment.
BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
- uint64_t ByteSize = (BSI.BitSize + 7) / 8;
- BSI.Bits.resize(ByteSize);
for (uint64_t Offset : Offsets) {
Offset >>= BSI.AlignLog2;
- BSI.Bits[Offset / 8] |= 1 << (Offset % 8);
+ BSI.Bits.insert(Offset);
}
return BSI;
@@ -147,15 +152,47 @@ void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
FragmentMap[ObjIndex] = FragmentIndex;
}
+void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
+ uint64_t BitSize, uint64_t &AllocByteOffset,
+ uint8_t &AllocMask) {
+ // Find the smallest current allocation.
+ unsigned Bit = 0;
+ for (unsigned I = 1; I != BitsPerByte; ++I)
+ if (BitAllocs[I] < BitAllocs[Bit])
+ Bit = I;
+
+ AllocByteOffset = BitAllocs[Bit];
+
+ // Add our size to it.
+ unsigned ReqSize = AllocByteOffset + BitSize;
+ BitAllocs[Bit] = ReqSize;
+ if (Bytes.size() < ReqSize)
+ Bytes.resize(ReqSize);
+
+ // Set our bits.
+ AllocMask = 1 << Bit;
+ for (uint64_t B : Bits)
+ Bytes[AllocByteOffset + B] |= AllocMask;
+}
+
namespace {
+struct ByteArrayInfo {
+ std::set<uint64_t> Bits;
+ uint64_t BitSize;
+ GlobalVariable *ByteArray;
+ Constant *Mask;
+};
+
struct LowerBitSets : public ModulePass {
static char ID;
LowerBitSets() : ModulePass(ID) {
initializeLowerBitSetsPass(*PassRegistry::getPassRegistry());
}
- const DataLayout *DL;
+ Module *M;
+
+ bool LinkerSubsectionsViaSymbols;
IntegerType *Int1Ty;
IntegerType *Int8Ty;
IntegerType *Int32Ty;
@@ -169,20 +206,23 @@ struct LowerBitSets : public ModulePass {
// Mapping from bitset mdstrings to the call sites that test them.
DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+ std::vector<ByteArrayInfo> ByteArrayInfos;
+
BitSetInfo
buildBitSet(MDString *BitSet,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
- Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,
- GlobalVariable *BitSetGlobal, Value *BitOffset);
+ ByteArrayInfo *createByteArray(BitSetInfo &BSI);
+ void allocateByteArrays();
+ Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ Value *BitOffset);
Value *
- lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI,
- GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal,
+ lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
+ GlobalVariable *CombinedGlobal,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
- void buildBitSetsFromGlobals(Module &M,
- const std::vector<MDString *> &BitSets,
+ void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
const std::vector<GlobalVariable *> &Globals);
- bool buildBitSets(Module &M);
- bool eraseBitSetMetadata(Module &M);
+ bool buildBitSets();
+ bool eraseBitSetMetadata();
bool doInitialization(Module &M) override;
bool runOnModule(Module &M) override;
@@ -198,19 +238,21 @@ char LowerBitSets::ID = 0;
ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; }
-bool LowerBitSets::doInitialization(Module &M) {
- DL = M.getDataLayout();
- if (!DL)
- report_fatal_error("Data layout required");
+bool LowerBitSets::doInitialization(Module &Mod) {
+ M = &Mod;
+ const DataLayout &DL = Mod.getDataLayout();
- Int1Ty = Type::getInt1Ty(M.getContext());
- Int8Ty = Type::getInt8Ty(M.getContext());
- Int32Ty = Type::getInt32Ty(M.getContext());
+ Triple TargetTriple(M->getTargetTriple());
+ LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+
+ Int1Ty = Type::getInt1Ty(M->getContext());
+ Int8Ty = Type::getInt8Ty(M->getContext());
+ Int32Ty = Type::getInt32Ty(M->getContext());
Int32PtrTy = PointerType::getUnqual(Int32Ty);
- Int64Ty = Type::getInt64Ty(M.getContext());
- IntPtrTy = DL->getIntPtrType(M.getContext(), 0);
+ Int64Ty = Type::getInt64Ty(M->getContext());
+ IntPtrTy = DL.getIntPtrType(M->getContext(), 0);
- BitSetNM = M.getNamedMetadata("llvm.bitsets");
+ BitSetNM = M->getNamedMetadata("llvm.bitsets");
BitSetTestCallSites.clear();
@@ -259,52 +301,128 @@ static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
}
+ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) {
+ // Create globals to stand in for byte arrays and masks. These never actually
+ // get initialized, we RAUW and erase them later in allocateByteArrays() once
+ // we know the offset and mask to use.
+ auto ByteArrayGlobal = new GlobalVariable(
+ *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+ auto MaskGlobal = new GlobalVariable(
+ *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+
+ ByteArrayInfos.emplace_back();
+ ByteArrayInfo *BAI = &ByteArrayInfos.back();
+
+ BAI->Bits = BSI.Bits;
+ BAI->BitSize = BSI.BitSize;
+ BAI->ByteArray = ByteArrayGlobal;
+ BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty);
+ return BAI;
+}
+
+void LowerBitSets::allocateByteArrays() {
+ std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
+ [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
+ return BAI1.BitSize > BAI2.BitSize;
+ });
+
+ std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
+
+ ByteArrayBuilder BAB;
+ for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+ ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+ uint8_t Mask;
+ BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
+
+ BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask));
+ cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
+ }
+
+ Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes);
+ auto ByteArray =
+ new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, ByteArrayConst);
+
+ for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
+ ByteArrayInfo *BAI = &ByteArrayInfos[I];
+
+ Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
+ ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(ByteArray, Idxs);
+
+ // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
+ // that the pc-relative displacement is folded into the lea instead of the
+ // test instruction getting another displacement.
+ if (LinkerSubsectionsViaSymbols) {
+ BAI->ByteArray->replaceAllUsesWith(GEP);
+ } else {
+ GlobalAlias *Alias = GlobalAlias::create(
+ Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
+ BAI->ByteArray->replaceAllUsesWith(Alias);
+ }
+ BAI->ByteArray->eraseFromParent();
+ }
+
+ ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
+ BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
+ BAB.BitAllocs[6] + BAB.BitAllocs[7];
+ ByteArraySizeBytes = BAB.Bytes.size();
+}
+
/// Build a test that bit BitOffset is set in BSI, where
/// BitSetGlobal is a global containing the bits in BSI.
-Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,
- GlobalVariable *BitSetGlobal,
- Value *BitOffset) {
- if (BSI.Bits.size() <= 8) {
+Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
+ ByteArrayInfo *&BAI, Value *BitOffset) {
+ if (BSI.BitSize <= 64) {
// If the bit set is sufficiently small, we can avoid a load by bit testing
// a constant.
IntegerType *BitsTy;
- if (BSI.Bits.size() <= 4)
+ if (BSI.BitSize <= 32)
BitsTy = Int32Ty;
else
BitsTy = Int64Ty;
uint64_t Bits = 0;
- for (auto I = BSI.Bits.rbegin(), E = BSI.Bits.rend(); I != E; ++I) {
- Bits <<= 8;
- Bits |= *I;
- }
+ for (auto Bit : BSI.Bits)
+ Bits |= uint64_t(1) << Bit;
Constant *BitsConst = ConstantInt::get(BitsTy, Bits);
return createMaskedBitTest(B, BitsConst, BitOffset);
} else {
- // TODO: We might want to use the memory variant of the bt instruction
- // with the previously computed bit offset at -Os. This instruction does
- // exactly what we want but has been benchmarked as being slower than open
- // coding the load+bt.
- Value *BitSetGlobalOffset =
- B.CreateLShr(BitOffset, ConstantInt::get(IntPtrTy, 5));
- Value *BitSetEntryAddr = B.CreateGEP(
- ConstantExpr::getBitCast(BitSetGlobal, Int32PtrTy), BitSetGlobalOffset);
- Value *BitSetEntry = B.CreateLoad(BitSetEntryAddr);
-
- return createMaskedBitTest(B, BitSetEntry, BitOffset);
+ if (!BAI) {
+ ++NumByteArraysCreated;
+ BAI = createByteArray(BSI);
+ }
+
+ Constant *ByteArray = BAI->ByteArray;
+ if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+ // Each use of the byte array uses a different alias. This makes the
+ // backend less likely to reuse previously computed byte array addresses,
+ // improving the security of the CFI mechanism based on this pass.
+ ByteArray = GlobalAlias::create(
+ BAI->ByteArray->getType()->getElementType(), 0,
+ GlobalValue::PrivateLinkage, "bits_use", ByteArray, M);
+ }
+
+ Value *ByteAddr = B.CreateGEP(ByteArray, BitOffset);
+ Value *Byte = B.CreateLoad(ByteAddr);
+
+ Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask);
+ return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
}
}
/// Lower a llvm.bitset.test call to its implementation. Returns the value to
/// replace the call with.
Value *LowerBitSets::lowerBitSetCall(
- CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal,
+ CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
GlobalVariable *CombinedGlobal,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
Value *Ptr = CI->getArgOperand(0);
+ const DataLayout &DL = M->getDataLayout();
if (BSI.containsValue(DL, GlobalLayout, Ptr))
- return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext());
+ return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
@@ -336,8 +454,8 @@ Value *LowerBitSets::lowerBitSetCall(
Value *OffsetSHR =
B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
Value *OffsetSHL = B.CreateShl(
- PtrOffset, ConstantInt::get(IntPtrTy, DL->getPointerSizeInBits(0) -
- BSI.AlignLog2));
+ PtrOffset,
+ ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
}
@@ -353,7 +471,7 @@ Value *LowerBitSets::lowerBitSetCall(
// Now that we know that the offset is in range and aligned, load the
// appropriate bit from the bitset.
- Value *Bit = createBitSetTest(ThenB, BSI, BitSetGlobal, BitOffset);
+ Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);
// The value we want is 0 if we came directly from the initial block
// (having failed the range or alignment checks), or the loaded bit if
@@ -368,14 +486,14 @@ Value *LowerBitSets::lowerBitSetCall(
/// Given a disjoint set of bitsets and globals, layout the globals, build the
/// bit sets and lower the llvm.bitset.test calls.
void LowerBitSets::buildBitSetsFromGlobals(
- Module &M,
const std::vector<MDString *> &BitSets,
const std::vector<GlobalVariable *> &Globals) {
// Build a new global with the combined contents of the referenced globals.
std::vector<Constant *> GlobalInits;
+ const DataLayout &DL = M->getDataLayout();
for (GlobalVariable *G : Globals) {
GlobalInits.push_back(G->getInitializer());
- uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType());
+ uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
// Compute the amount of padding required to align the next element to the
// next power of 2.
@@ -391,13 +509,13 @@ void LowerBitSets::buildBitSetsFromGlobals(
}
if (!GlobalInits.empty())
GlobalInits.pop_back();
- Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
+ Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
auto CombinedGlobal =
- new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
+ new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
GlobalValue::PrivateLinkage, NewInit);
const StructLayout *CombinedGlobalLayout =
- DL->getStructLayout(cast<StructType>(NewInit->getType()));
+ DL.getStructLayout(cast<StructType>(NewInit->getType()));
// Compute the offsets of the original globals within the new global.
DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
@@ -410,18 +528,12 @@ void LowerBitSets::buildBitSetsFromGlobals(
// Build the bitset.
BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
- // Create a global in which to store it.
- ++NumBitSetsCreated;
- Constant *BitsConst = ConstantDataArray::get(M.getContext(), BSI.Bits);
- auto BitSetGlobal = new GlobalVariable(
- M, BitsConst->getType(), /*isConstant=*/true,
- GlobalValue::PrivateLinkage, BitsConst, BS->getString() + ".bits");
+ ByteArrayInfo *BAI = 0;
// Lower each call to llvm.bitset.test for this bitset.
for (CallInst *CI : BitSetTestCallSites[BS]) {
++NumBitSetCallsLowered;
- Value *Lowered =
- lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout);
+ Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
CI->replaceAllUsesWith(Lowered);
CI->eraseFromParent();
}
@@ -436,20 +548,24 @@ void LowerBitSets::buildBitSetsFromGlobals(
ConstantInt::get(Int32Ty, I * 2)};
Constant *CombinedGlobalElemPtr =
ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs);
- GlobalAlias *GAlias = GlobalAlias::create(
- Globals[I]->getType()->getElementType(),
- Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(),
- "", CombinedGlobalElemPtr, &M);
- GAlias->takeName(Globals[I]);
- Globals[I]->replaceAllUsesWith(GAlias);
+ if (LinkerSubsectionsViaSymbols) {
+ Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+ } else {
+ GlobalAlias *GAlias = GlobalAlias::create(
+ Globals[I]->getType()->getElementType(),
+ Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(),
+ "", CombinedGlobalElemPtr, M);
+ GAlias->takeName(Globals[I]);
+ Globals[I]->replaceAllUsesWith(GAlias);
+ }
Globals[I]->eraseFromParent();
}
}
/// Lower all bit sets in this module.
-bool LowerBitSets::buildBitSets(Module &M) {
+bool LowerBitSets::buildBitSets() {
Function *BitSetTestFunc =
- M.getFunction(Intrinsic::getName(Intrinsic::bitset_test));
+ M->getFunction(Intrinsic::getName(Intrinsic::bitset_test));
if (!BitSetTestFunc)
return false;
@@ -591,22 +707,24 @@ bool LowerBitSets::buildBitSets(Module &M) {
});
// Build the bitsets from this disjoint set.
- buildBitSetsFromGlobals(M, BitSets, OrderedGlobals);
+ buildBitSetsFromGlobals(BitSets, OrderedGlobals);
}
+ allocateByteArrays();
+
return true;
}
-bool LowerBitSets::eraseBitSetMetadata(Module &M) {
+bool LowerBitSets::eraseBitSetMetadata() {
if (!BitSetNM)
return false;
- M.eraseNamedMetadata(BitSetNM);
+ M->eraseNamedMetadata(BitSetNM);
return true;
}
bool LowerBitSets::runOnModule(Module &M) {
- bool Changed = buildBitSets(M);
- Changed |= eraseBitSetMetadata(M);
+ bool Changed = buildBitSets();
+ Changed |= eraseBitSetMetadata();
return Changed;
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index b91ebf2..596674d 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -127,9 +127,8 @@ namespace {
/// side of claiming that two functions are different).
class FunctionComparator {
public:
- FunctionComparator(const DataLayout *DL, const Function *F1,
- const Function *F2)
- : FnL(F1), FnR(F2), DL(DL) {}
+ FunctionComparator(const Function *F1, const Function *F2)
+ : FnL(F1), FnR(F2) {}
/// Test whether the two functions have equivalent behaviour.
int compare();
@@ -292,8 +291,7 @@ private:
/// Parts to be compared for each comparison stage,
/// most significant stage first:
/// 1. Address space. As numbers.
- /// 2. Constant offset, (if "DataLayout *DL" field is not NULL,
- /// using GEPOperator::accumulateConstantOffset method).
+ /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
/// 3. Pointer operand type (using cmpType method).
/// 4. Number of operands.
/// 5. Compare operands, using cmpValues method.
@@ -354,8 +352,6 @@ private:
// The two functions undergoing comparison.
const Function *FnL, *FnR;
- const DataLayout *DL;
-
/// Assign serial numbers to values from left function, and values from
/// right function.
/// Explanation:
@@ -394,14 +390,13 @@ private:
class FunctionNode {
AssertingVH<Function> F;
- const DataLayout *DL;
public:
- FunctionNode(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
+ FunctionNode(Function *F) : F(F) {}
Function *getFunc() const { return F; }
void release() { F = 0; }
bool operator<(const FunctionNode &RHS) const {
- return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1;
+ return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
}
};
}
@@ -620,10 +615,11 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
PointerType *PTyL = dyn_cast<PointerType>(TyL);
PointerType *PTyR = dyn_cast<PointerType>(TyR);
- if (DL) {
- if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL);
- if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR);
- }
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ if (PTyL && PTyL->getAddressSpace() == 0)
+ TyL = DL.getIntPtrType(TyL);
+ if (PTyR && PTyR->getAddressSpace() == 0)
+ TyR = DL.getIntPtrType(TyR);
if (TyL == TyR)
return 0;
@@ -855,13 +851,12 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
// When we have target data, we can reduce the GEP down to the value in bytes
// added to the address.
- if (DL) {
- unsigned BitWidth = DL->getPointerSizeInBits(ASL);
- APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
- if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
- GEPR->accumulateConstantOffset(*DL, OffsetR))
- return cmpAPInts(OffsetL, OffsetR);
- }
+ const DataLayout &DL = FnL->getParent()->getDataLayout();
+ unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+ APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+ GEPR->accumulateConstantOffset(DL, OffsetR))
+ return cmpAPInts(OffsetL, OffsetR);
if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
(uint64_t)GEPR->getPointerOperand()->getType()))
@@ -1122,9 +1117,6 @@ private:
/// to modify it.
FnTreeType FnTree;
- /// DataLayout for more accurate GEP comparisons. May be NULL.
- const DataLayout *DL;
-
/// Whether or not the target supports global aliases.
bool HasGlobalAliases;
};
@@ -1152,8 +1144,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
Function *F1 = cast<Function>(*I);
Function *F2 = cast<Function>(*J);
- int Res1 = FunctionComparator(DL, F1, F2).compare();
- int Res2 = FunctionComparator(DL, F2, F1).compare();
+ int Res1 = FunctionComparator(F1, F2).compare();
+ int Res2 = FunctionComparator(F2, F1).compare();
// If F1 <= F2, then F2 >= F1, otherwise report failure.
if (Res1 != -Res2) {
@@ -1174,8 +1166,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
continue;
Function *F3 = cast<Function>(*K);
- int Res3 = FunctionComparator(DL, F1, F3).compare();
- int Res4 = FunctionComparator(DL, F2, F3).compare();
+ int Res3 = FunctionComparator(F1, F3).compare();
+ int Res4 = FunctionComparator(F2, F3).compare();
bool Transitive = true;
@@ -1212,8 +1204,6 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
@@ -1420,7 +1410,7 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
// that was already inserted.
bool MergeFunctions::insert(Function *NewFunction) {
std::pair<FnTreeType::iterator, bool> Result =
- FnTree.insert(FunctionNode(NewFunction, DL));
+ FnTree.insert(FunctionNode(NewFunction));
if (Result.second) {
DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
@@ -1457,7 +1447,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
void MergeFunctions::remove(Function *F) {
// We need to make sure we remove F, not a function "equal" to F per the
// function equality comparator.
- FnTreeType::iterator found = FnTree.find(FunctionNode(F, DL));
+ FnTreeType::iterator found = FnTree.find(FunctionNode(F));
size_t Erased = 0;
if (found != FnTree.end() && found->getFunc() == F) {
Erased = 1;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9a75050..d28d563 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -77,6 +77,10 @@ static cl::opt<bool>
EnableMLSM("mlsm", cl::init(true), cl::Hidden,
cl::desc("Enable motion of merged load and store"));
+static cl::opt<bool> EnableLoopInterchange(
+ "enable-loopinterchange", cl::init(false), cl::Hidden,
+ cl::desc("Enable the new, experimental LoopInterchange Pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -93,7 +97,6 @@ PassManagerBuilder::PassManagerBuilder() {
DisableGVNLoadPRE = false;
VerifyInput = false;
VerifyOutput = false;
- StripDebug = false;
MergeFunctions = false;
}
@@ -239,6 +242,8 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
+ if (EnableLoopInterchange)
+ MPM.add(createLoopInterchangePass()); // Interchange loops
if (!DisableUnrollLoops)
MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
@@ -305,8 +310,7 @@ void PassManagerBuilder::populateModulePassManager(
// Re-rotate loops in all our loop nests. These may have fallout out of
// rotated form due to GVN or other transformations, and the vectorizer relies
// on the rotated form.
- if (ExtraVectorizerPasses)
- MPM.add(createLoopRotatePass());
+ MPM.add(createLoopRotatePass());
MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
// FIXME: Because of #pragma vectorize enable, the passes below are always
@@ -358,9 +362,20 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCFGSimplificationPass());
MPM.add(createInstructionCombiningPass());
- if (!DisableUnrollLoops)
+ if (!DisableUnrollLoops) {
MPM.add(createLoopUnrollPass()); // Unroll small loops
+ // This is a barrier pass to avoid combine LICM pass and loop unroll pass
+ // within same loop pass manager.
+ MPM.add(createInstructionSimplifierPass());
+
+ // Runtime unrolling will introduce runtime check in loop prologue. If the
+ // unrolled loop is a inner loop, then the prologue will be inside the
+ // outer loop. LICM pass can help to promote the runtime check out if the
+ // checked value is loop invariant.
+ MPM.add(createLICMPass());
+ }
+
// After vectorization and unrolling, assume intrinsics may tell us more
// about pointer alignments.
MPM.add(createAlignmentFromAssumptionsPass());
@@ -454,6 +469,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// More loops are countable; try to optimize them.
PM.add(createIndVarSimplifyPass());
PM.add(createLoopDeletionPass());
+ if (EnableLoopInterchange)
+ PM.add(createLoopInterchangePass());
+
PM.add(createLoopVectorizePass(true, LoopVectorize));
// More scalar chains could be vectorized due to more alias information
@@ -473,10 +491,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
+}
- // Lower bitset metadata to bitsets.
- PM.add(createLowerBitSetsPass());
-
+void PassManagerBuilder::addLateLTOOptimizationPasses(
+ legacy::PassManagerBase &PM) {
// Delete basic blocks, which optimization passes may have killed.
PM.add(createCFGSimplificationPass());
@@ -496,19 +514,19 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
if (VerifyInput)
PM.add(createVerifierPass());
- if (StripDebug)
- PM.add(createStripSymbolsPass(true));
+ if (OptLevel > 1)
+ addLTOOptimizationPasses(PM);
- if (VerifyInput)
- PM.add(createDebugInfoVerifierPass());
+ // Lower bit sets to globals. This pass supports Clang's control flow
+ // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
+ // is enabled. The pass does nothing if CFI is disabled.
+ PM.add(createLowerBitSetsPass());
if (OptLevel != 0)
- addLTOOptimizationPasses(PM);
+ addLateLTOOptimizationPasses(PM);
- if (VerifyOutput) {
+ if (VerifyOutput)
PM.add(createVerifierPass());
- PM.add(createDebugInfoVerifierPass());
- }
}
inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 752f79d..c608f84 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -891,7 +891,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// There are different heuristics we can use for this. Here are some simple
// ones.
@@ -909,18 +909,18 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
//
// Since the carry into the most significant position is always equal to
// the carry out of the addition, there is no signed overflow.
- if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
- ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
// Addition of two 2's compliment numbers having opposite signs will never
// overflow.
@@ -943,21 +943,21 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
/// overflow to change the sign bit or have a carry out.
/// TODO: Handle this for Vectors.
bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If LHS and RHS each have at least two sign bits, the subtraction
// cannot overflow.
- if (ComputeNumSignBits(LHS, 0, CxtI) > 1 &&
- ComputeNumSignBits(RHS, 0, CxtI) > 1)
+ if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 &&
+ ComputeNumSignBits(RHS, 0, &CxtI) > 1)
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, CxtI);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, CxtI);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
// Subtraction of two 2's compliment numbers having identical signs will
// never overflow.
@@ -972,12 +972,14 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
/// \brief Return true if we can prove that:
/// (sub LHS, RHS) === (sub nuw LHS, RHS)
bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If the LHS is negative and the RHS is non-negative, no unsigned wrap.
bool LHSKnownNonNegative, LHSKnownNegative;
bool RHSKnownNonNegative, RHSKnownNegative;
- ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, CxtI);
- ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, CxtI);
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0,
+ &CxtI);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0,
+ &CxtI);
if (LHSKnownNegative && RHSKnownNonNegative)
return true;
@@ -1046,15 +1048,15 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
}
Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
- bool Changed = SimplifyAssociativeOrCommutative(I);
- Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- if (Value *V = SimplifyVectorOp(I))
- return ReplaceInstUsesWith(I, V);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
- if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
- return ReplaceInstUsesWith(I, V);
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+ return ReplaceInstUsesWith(I, V);
// (A*B)+(A*C) -> A*(B+C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -1243,7 +1245,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new, smaller add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1256,10 +1258,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of sexts), and if the
// integer add will not overflow.
- if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), &I)) {
+ RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0), "addconv");
@@ -1307,7 +1310,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// TODO(jingyue): Consider WillNotOverflowSignedAdd and
// WillNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -1371,7 +1374,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, &I)) {
+ WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
CI, "addconv");
@@ -1384,10 +1387,11 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of int->fp conversions),
// and if the integer add will not overflow.
- if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+ if (LHSConv->getOperand(0)->getType() ==
+ RHSConv->getOperand(0)->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), &I)) {
+ RHSConv->getOperand(0), I)) {
// Insert the new integer add.
Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
RHSConv->getOperand(0),"addconv");
@@ -1436,8 +1440,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
///
Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Type *Ty) {
- assert(DL && "Must have target data info for this");
-
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
@@ -1662,26 +1664,24 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
- if (DL) {
- Value *LHSOp, *RHSOp;
- if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
- match(Op1, m_PtrToInt(m_Value(RHSOp))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
- return ReplaceInstUsesWith(I, Res);
-
- // trunc(p)-trunc(q) -> trunc(p-q)
- if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
- match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
- if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
- return ReplaceInstUsesWith(I, Res);
- }
+ Value *LHSOp, *RHSOp;
+ if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+ match(Op1, m_PtrToInt(m_Value(RHSOp))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
+
+ // trunc(p)-trunc(q) -> trunc(p-q)
+ if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+ match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+ if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+ return ReplaceInstUsesWith(I, Res);
bool Changed = false;
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
- if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, &I)) {
+ if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
Changed = true;
I.setHasNoUnsignedWrap(true);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 863eeaf..ee21c81 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -979,9 +979,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// Make a constant range that's the intersection of the two icmp ranges.
// If the intersection is empty, we know that the result is false.
ConstantRange LHSRange =
- ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+ ConstantRange::makeAllowedICmpRegion(LHSCC, LHSCst->getValue());
ConstantRange RHSRange =
- ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+ ConstantRange::makeAllowedICmpRegion(RHSCC, RHSCst->getValue());
if (LHSRange.intersectWith(RHSRange).isEmptySet())
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
@@ -1709,15 +1709,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Value *Mask = nullptr;
Value *Masked = nullptr;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, AC, CxtI, DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, AC, CxtI, DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI,
+ DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
} else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
- isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, AC, CxtI,
- DT) &&
- isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, AC, CxtI,
- DT)) {
+ isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT) &&
+ isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC,
+ CxtI, DT)) {
Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 05e7162..21243c2 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
@@ -61,8 +60,8 @@ static Type *reduceToSingleValueType(Type *T) {
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, AC, MI, DT);
- unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, AC, MI, DT);
+ unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
+ unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -108,7 +107,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
- if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) {
+ if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
// The SrcETy might be something like {{{double}}} or [1 x double]. Rip
// down through these levels if so.
SrcETy = reduceToSingleValueType(SrcETy);
@@ -156,7 +155,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
}
Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
- unsigned Alignment = getKnownAlignment(MI->getDest(), DL, AC, MI, DT);
+ unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
if (MI->getAlignment() < Alignment) {
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
Alignment, false));
@@ -198,6 +197,71 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return nullptr;
}
+/// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
+/// source vectors, unless a zero bit is set. If a zero bit is set,
+/// then ignore that half of the mask and clear that half of the vector.
+static Value *SimplifyX86vperm2(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ if (auto CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+ VectorType *VecTy = cast<VectorType>(II.getType());
+ ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ uint8_t Imm = CInt->getZExtValue();
+
+ bool LowHalfZero = Imm & 0x08;
+ bool HighHalfZero = Imm & 0x80;
+
+ // If both zero mask bits are set, this was just a weird way to
+ // generate a zero vector.
+ if (LowHalfZero && HighHalfZero)
+ return ZeroVector;
+
+ // If 0 or 1 zero mask bits are set, this is a simple shuffle.
+ unsigned NumElts = VecTy->getNumElements();
+ unsigned HalfSize = NumElts / 2;
+ SmallVector<int, 8> ShuffleMask(NumElts);
+
+ // The high bit of the selection field chooses the 1st or 2nd operand.
+ bool LowInputSelect = Imm & 0x02;
+ bool HighInputSelect = Imm & 0x20;
+
+ // The low bit of the selection field chooses the low or high half
+ // of the selected operand.
+ bool LowHalfSelect = Imm & 0x01;
+ bool HighHalfSelect = Imm & 0x10;
+
+ // Determine which operand(s) are actually in use for this instruction.
+ Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+ Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
+
+ // If needed, replace operands based on zero mask.
+ V0 = LowHalfZero ? ZeroVector : V0;
+ V1 = HighHalfZero ? ZeroVector : V1;
+
+ // Permute low half of result.
+ unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i] = StartIndex + i;
+
+ // Permute high half of result.
+ StartIndex = HighHalfSelect ? HalfSize : 0;
+ StartIndex += NumElts;
+ for (unsigned i = 0; i < HalfSize; ++i)
+ ShuffleMask[i + HalfSize] = StartIndex + i;
+
+ return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+ }
+ return nullptr;
+}
+
/// visitCallInst - CallInst simplification. This mostly only handles folding
/// of intrinsic instructions. For normal calls, it allows visitCallSite to do
/// the heavy lifting.
@@ -386,7 +450,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// can prove that it will never overflow.
if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedAdd(LHS, RHS, II)) {
+ if (WillNotOverflowSignedAdd(LHS, RHS, *II)) {
return CreateOverflowTuple(II, Builder->CreateNSWAdd(LHS, RHS), false);
}
}
@@ -407,11 +471,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
}
if (II->getIntrinsicID() == Intrinsic::ssub_with_overflow) {
- if (WillNotOverflowSignedSub(LHS, RHS, II)) {
+ if (WillNotOverflowSignedSub(LHS, RHS, *II)) {
return CreateOverflowTuple(II, Builder->CreateNSWSub(LHS, RHS), false);
}
} else {
- if (WillNotOverflowUnsignedSub(LHS, RHS, II)) {
+ if (WillNotOverflowUnsignedSub(LHS, RHS, *II)) {
return CreateOverflowTuple(II, Builder->CreateNUWSub(LHS, RHS), false);
}
}
@@ -452,7 +516,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
if (II->getIntrinsicID() == Intrinsic::smul_with_overflow) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
- if (WillNotOverflowSignedMul(LHS, RHS, II)) {
+ if (WillNotOverflowSignedMul(LHS, RHS, *II)) {
return CreateOverflowTuple(II, Builder->CreateNSWMul(LHS, RHS), false);
}
}
@@ -544,7 +608,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
// Turn PPC lvx -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -561,7 +625,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -578,7 +642,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::ppc_qpx_qvlfs:
// Turn PPC QPX qvlfs -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -587,7 +651,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::ppc_qpx_qvlfd:
// Turn PPC QPX qvlfd -> load if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
32) {
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
@@ -596,7 +660,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::ppc_qpx_qvstfs:
// Turn PPC QPX qvstfs -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -606,7 +670,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
case Intrinsic::ppc_qpx_qvstfd:
// Turn PPC QPX qvstfd -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
32) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(0)->getType());
@@ -618,7 +682,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, AC, II, DT) >=
+ if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
16) {
Type *OpPtrTy =
PointerType::getUnqual(II->getArgOperand(1)->getType());
@@ -735,9 +799,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned LowHalfElts = VWidth / 2;
APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
APInt UndefElts(VWidth, 0);
- if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0),
- InputDemandedElts,
- UndefElts)) {
+ if (Value *TmpV = SimplifyDemandedVectorElts(
+ II->getArgOperand(0), InputDemandedElts, UndefElts)) {
II->setArgOperand(0, TmpV);
return II;
}
@@ -906,6 +969,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return ReplaceInstUsesWith(CI, Shuffle);
}
+ case Intrinsic::x86_avx_vperm2f128_pd_256:
+ case Intrinsic::x86_avx_vperm2f128_ps_256:
+ case Intrinsic::x86_avx_vperm2f128_si_256:
+ case Intrinsic::x86_avx2_vperm2i128:
+ if (Value *V = SimplifyX86vperm2(*II, *Builder))
+ return ReplaceInstUsesWith(*II, V);
+ break;
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
// Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -945,12 +1016,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned Idx =
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
- if (DL && DL->isLittleEndian())
+ if (DL.isLittleEndian())
Idx = 31 - Idx;
if (!ExtractedElts[Idx]) {
- Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
- Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
+ Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
Builder->getInt32(Idx&15));
@@ -979,7 +1050,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, AC, II, DT);
+ unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
unsigned AlignArg = II->getNumArgOperands() - 1;
ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -1118,7 +1189,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
RHS->getType()->isPointerTy() &&
cast<Constant>(RHS)->isNullValue()) {
LoadInst* LI = cast<LoadInst>(LHS);
- if (isValidAssumeForContext(II, LI, DL, DT)) {
+ if (isValidAssumeForContext(II, LI, DT)) {
MDNode *MD = MDNode::get(II->getContext(), None);
LI->setMetadata(LLVMContext::MD_nonnull, MD);
return EraseInstFromFunction(*II);
@@ -1192,8 +1263,8 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
/// isSafeToEliminateVarargsCast - If this cast does not affect the value
/// passed through the varargs area, we can eliminate the use of the cast.
static bool isSafeToEliminateVarargsCast(const CallSite CS,
- const CastInst * const CI,
- const DataLayout * const DL,
+ const DataLayout &DL,
+ const CastInst *const CI,
const int ix) {
if (!CI->isLosslessCast())
return false;
@@ -1217,7 +1288,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
if (!SrcTy->isSized() || !DstTy->isSized())
return false;
- if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy))
+ if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
return false;
return true;
}
@@ -1226,7 +1297,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// Currently we're only working with the checking functions, memcpy_chk,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
-Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
auto InstCombineRAUW = [this](Instruction *From, Value *With) {
@@ -1391,7 +1462,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
E = CS.arg_end(); I != E; ++I, ++ix) {
CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) {
+ if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
*I = CI->getOperand(0);
Changed = true;
}
@@ -1408,7 +1479,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// this. None of these calls are seen as possibly dead so go ahead and
// delete the instruction now.
if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
- Instruction *I = tryOptimizeCall(CI, DL);
+ Instruction *I = tryOptimizeCall(CI);
// If we changed something return the result, etc. Otherwise let
// the fallthrough check.
if (I) return EraseInstFromFunction(*I);
@@ -1487,7 +1558,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
//
// into:
// call void @takes_i32_inalloca(i32* null)
- if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ //
+ // Similarly, avoid folding away bitcasts of byval calls.
+ if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+ Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;
CallSite::arg_iterator AI = CS.arg_begin();
@@ -1512,12 +1586,12 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
Type *CurElTy = ActTy->getPointerElementType();
- if (DL->getTypeAllocSize(CurElTy) !=
- DL->getTypeAllocSize(ParamPTy->getElementType()))
+ if (DL.getTypeAllocSize(CurElTy) !=
+ DL.getTypeAllocSize(ParamPTy->getElementType()))
return false;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3e2b719..fe544c2 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -80,9 +80,6 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
/// try to eliminate the cast by moving the type information into the alloc.
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
- // This requires DataLayout to get the alloca alignment and size information.
- if (!DL) return nullptr;
-
PointerType *PTy = cast<PointerType>(CI.getType());
BuilderTy AllocaBuilder(*Builder);
@@ -93,8 +90,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
Type *CastElTy = PTy->getElementType();
if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
- unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy);
- unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy);
+ unsigned AllocElTyAlign = DL.getABITypeAlignment(AllocElTy);
+ unsigned CastElTyAlign = DL.getABITypeAlignment(CastElTy);
if (CastElTyAlign < AllocElTyAlign) return nullptr;
// If the allocation has multiple uses, only promote it if we are strictly
@@ -102,14 +99,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// same, we open the door to infinite loops of various kinds.
if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
- uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy);
- uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy);
+ uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy);
+ uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy);
if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy);
- uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy);
+ uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy);
+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy);
if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
// See if we can satisfy the modulus by pulling a scale out of the array
@@ -215,7 +212,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
PHINode *OPN = cast<PHINode>(I);
PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
- Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+ Value *V =
+ EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
NPN->addIncoming(V, OPN->getIncomingBlock(i));
}
Res = NPN;
@@ -234,25 +232,22 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
/// This function is a wrapper around CastInst::isEliminableCastPair. It
/// simply extracts arguments and returns what that function returns.
static Instruction::CastOps
-isEliminableCastPair(
- const CastInst *CI, ///< The first cast instruction
- unsigned opcode, ///< The opcode of the second cast instruction
- Type *DstTy, ///< The target type for the second cast instruction
- const DataLayout *DL ///< The target data for pointer size
-) {
-
+isEliminableCastPair(const CastInst *CI, ///< First cast instruction
+ unsigned opcode, ///< Opcode for the second cast
+ Type *DstTy, ///< Target type for the second cast
+ const DataLayout &DL) {
Type *SrcTy = CI->getOperand(0)->getType(); // A from above
Type *MidTy = CI->getType(); // B from above
// Get the opcodes of the two Cast instructions
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
- Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(SrcTy) : nullptr;
- Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(MidTy) : nullptr;
- Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(DstTy) : nullptr;
+ Type *SrcIntPtrTy =
+ SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
+ Type *MidIntPtrTy =
+ MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
+ Type *DstIntPtrTy =
+ DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy, SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy);
@@ -298,7 +293,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
// eliminate it now.
if (CastInst *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
if (Instruction::CastOps opc =
- isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
+ isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
// The first cast (CSrc) is eliminable so we need to fix up or replace
// the second cast (CI). CSrc will then have a good chance of being dead.
return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
@@ -314,8 +309,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (isa<PHINode>(Src)) {
// We don't do this if this would create a PHI node with an illegal type if
// it is currently legal.
- if (!Src->getType()->isIntegerTy() ||
- !CI.getType()->isIntegerTy() ||
+ if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
ShouldChangeType(CI.getType(), Src->getType()))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
@@ -1419,18 +1413,15 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// If the source integer type is not the intptr_t type for this target, do a
// trunc or zext to the intptr_t type, then inttoptr of it. This allows the
// cast to be exposed to other transforms.
-
- if (DL) {
- unsigned AS = CI.getAddressSpace();
- if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
- DL->getPointerSizeInBits(AS)) {
- Type *Ty = DL->getIntPtrType(CI.getContext(), AS);
- if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
- Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
-
- Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
- return new IntToPtrInst(P, CI.getType());
- }
+ unsigned AS = CI.getAddressSpace();
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
+ DL.getPointerSizeInBits(AS)) {
+ Type *Ty = DL.getIntPtrType(CI.getContext(), AS);
+ if (CI.getType()->isVectorTy()) // Handle vectors of pointers.
+ Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements());
+
+ Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty);
+ return new IntToPtrInst(P, CI.getType());
}
if (Instruction *I = commonCastTransforms(CI))
@@ -1460,32 +1451,33 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return &CI;
}
- if (!DL)
- return commonCastTransforms(CI);
-
// If the GEP has a single use, and the base pointer is a bitcast, and the
// GEP computes a constant offset, see if we can convert these three
// instructions into fewer. This typically happens with unions and other
// non-type-safe code.
unsigned AS = GEP->getPointerAddressSpace();
- unsigned OffsetBits = DL->getPointerSizeInBits(AS);
+ unsigned OffsetBits = DL.getPointerSizeInBits(AS);
APInt Offset(OffsetBits, 0);
BitCastInst *BCI = dyn_cast<BitCastInst>(GEP->getOperand(0));
- if (GEP->hasOneUse() &&
- BCI &&
- GEP->accumulateConstantOffset(*DL, Offset)) {
+ if (GEP->hasOneUse() && BCI && GEP->accumulateConstantOffset(DL, Offset)) {
+ // FIXME: This is insufficiently tested - just a no-crash test
+ // (test/Transforms/InstCombine/2007-05-14-Crash.ll)
+ //
// Get the base pointer input of the bitcast, and the type it points to.
Value *OrigBase = BCI->getOperand(0);
SmallVector<Value*, 8> NewIndices;
- if (FindElementAtOffset(OrigBase->getType(),
- Offset.getSExtValue(),
+ if (FindElementAtOffset(OrigBase->getType(), Offset.getSExtValue(),
NewIndices)) {
+ // FIXME: This codepath is completely untested - could be unreachable
+ // for all I know.
// If we were able to index down into an element, create the GEP
// and bitcast the result. This eliminates one bitcast, potentially
// two.
- Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
- Builder->CreateInBoundsGEP(OrigBase, NewIndices) :
- Builder->CreateGEP(OrigBase, NewIndices);
+ Value *NGEP = cast<GEPOperator>(GEP)->isInBounds()
+ ? Builder->CreateInBoundsGEP(OrigBase, NewIndices)
+ : Builder->CreateGEP(
+ OrigBase->getType()->getPointerElementType(),
+ OrigBase, NewIndices);
NGEP->takeName(GEP);
if (isa<BitCastInst>(CI))
@@ -1504,16 +1496,13 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
// to be exposed to other transforms.
- if (!DL)
- return commonPointerCastTransforms(CI);
-
Type *Ty = CI.getType();
unsigned AS = CI.getPointerAddressSpace();
- if (Ty->getScalarSizeInBits() == DL->getPointerSizeInBits(AS))
+ if (Ty->getScalarSizeInBits() == DL.getPointerSizeInBits(AS))
return commonPointerCastTransforms(CI);
- Type *PtrTy = DL->getIntPtrType(CI.getContext(), AS);
+ Type *PtrTy = DL.getIntPtrType(CI.getContext(), AS);
if (Ty->isVectorTy()) // Handle vectors of pointers.
PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements());
@@ -1597,8 +1586,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
/// This returns false if the pattern can't be matched or true if it can,
/// filling in Elements with the elements found here.
static bool CollectInsertionElements(Value *V, unsigned Shift,
- SmallVectorImpl<Value*> &Elements,
- Type *VecEltTy, InstCombiner &IC) {
+ SmallVectorImpl<Value *> &Elements,
+ Type *VecEltTy, bool isBigEndian) {
assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
"Shift should be a multiple of the element type size");
@@ -1614,7 +1603,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
return true;
unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
- if (IC.getDataLayout()->isBigEndian())
+ if (isBigEndian)
ElementIndex = Elements.size() - ElementIndex - 1;
// Fail if multiple elements are inserted into this slot.
@@ -1634,7 +1623,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
// it to the right type so it gets properly inserted.
if (NumElts == 1)
return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
- Shift, Elements, VecEltTy, IC);
+ Shift, Elements, VecEltTy, isBigEndian);
// Okay, this is a constant that covers multiple elements. Slice it up into
// pieces and insert each element-sized piece into the vector.
@@ -1649,7 +1638,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
ShiftI));
Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
- if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
+ if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+ isBigEndian))
return false;
}
return true;
@@ -1662,28 +1652,28 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::ZExt:
if (!isMultipleOfTypeSize(
I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
VecEltTy))
return false;
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Or:
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC) &&
- CollectInsertionElements(I->getOperand(1), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian) &&
+ CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+ isBigEndian);
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
if (!CI) return false;
Shift += CI->getZExtValue();
if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
- return CollectInsertionElements(I->getOperand(0), Shift,
- Elements, VecEltTy, IC);
+ return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+ isBigEndian);
}
}
@@ -1706,15 +1696,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
/// Into two insertelements that do "buildvector{%inc, %inc5}".
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return nullptr;
-
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
- DestVecTy->getElementType(), IC))
+ DestVecTy->getElementType(),
+ IC.getDataLayout().isBigEndian()))
return nullptr;
// If we succeeded, we know that all of the element are specified by Elements
@@ -1734,10 +1722,8 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
/// bitcast. The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
- // We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return nullptr;
-
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+ const DataLayout &DL) {
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
@@ -1760,7 +1746,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = 0;
- if (IC.getDataLayout()->isBigEndian())
+ if (DL.isBigEndian())
Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1784,7 +1770,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
unsigned Elt = ShAmt->getZExtValue() / DestWidth;
- if (IC.getDataLayout()->isBigEndian())
+ if (DL.isBigEndian())
Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
@@ -1839,7 +1825,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// Try to optimize int -> float bitcasts.
if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
- if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL))
return I;
if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index f48d89b..803b50a 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -229,10 +229,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
- // We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && !DL)
- return nullptr;
-
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
@@ -303,7 +299,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// the array, this will fully represent all the comparison results.
uint64_t MagicBitvector = 0;
-
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
@@ -398,7 +393,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// index down like the GEP would do implicitly. We don't have to do this for
// an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds()) {
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
Idx = Builder->CreateTrunc(Idx, IntPtrTy);
@@ -487,10 +482,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// - Default to i32
if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
Ty = Idx->getType();
- else if (DL)
- Ty = DL->getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
- else if (ArrayElementCount <= 32)
- Ty = Type::getInt32Ty(Init->getContext());
+ else
+ Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
if (Ty) {
Value *V = Builder->CreateIntCast(Idx, Ty, false);
@@ -514,8 +507,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
///
/// If we can't emit an optimized form for this expression, this returns null.
///
-static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
- const DataLayout &DL = *IC.getDataLayout();
+static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
+ const DataLayout &DL) {
gep_type_iterator GTI = gep_type_begin(GEP);
// Check to see if this gep only has a single variable index. If so, and if
@@ -628,12 +621,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
RHS = RHS->stripPointerCasts();
Value *PtrBase = GEPLHS->getOperand(0);
- if (DL && PtrBase == RHS && GEPLHS->isInBounds()) {
+ if (PtrBase == RHS && GEPLHS->isInBounds()) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
// This transformation (ignoring the base and scales) is valid because we
// know pointers can't overflow since the gep is inbounds. See if we can
// output an optimized form.
- Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL);
// If not, synthesize the offset the hard way.
if (!Offset)
@@ -661,11 +654,11 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// If we're comparing GEPs with two base pointers that only differ in type
// and both GEPs have only constant indices or just one use, then fold
// the compare with the adjusted indices.
- if (DL && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+ if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
(GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
(GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
PtrBase->stripPointerCasts() ==
- GEPRHS->getOperand(0)->stripPointerCasts()) {
+ GEPRHS->getOperand(0)->stripPointerCasts()) {
Value *LOffset = EmitGEPOffset(GEPLHS);
Value *ROffset = EmitGEPOffset(GEPRHS);
@@ -733,9 +726,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Only lower this if the icmp is the only user of the GEP or if we expect
// the result to fold to a constant!
- if (DL &&
- GEPsInBounds &&
- (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
(isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
Value *L = EmitGEPOffset(GEPLHS);
@@ -1928,8 +1919,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
// integer type is the same size as the pointer type.
- if (DL && LHSCI->getOpcode() == Instruction::PtrToInt &&
- DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
+ if (LHSCI->getOpcode() == Instruction::PtrToInt &&
+ DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
Value *RHSOp = nullptr;
if (PtrToIntOperator *RHSC = dyn_cast<PtrToIntOperator>(ICI.getOperand(1))) {
Value *RHSCIOp = RHSC->getOperand(0);
@@ -2660,8 +2651,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
unsigned BitWidth = 0;
if (Ty->isIntOrIntVectorTy())
BitWidth = Ty->getScalarSizeInBits();
- else if (DL) // Pointers require DL info to get their size.
- BitWidth = DL->getTypeSizeInBits(Ty->getScalarType());
+ else // Get pointer size.
+ BitWidth = DL.getTypeSizeInBits(Ty->getScalarType());
bool isSignBit = false;
@@ -2774,8 +2765,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Op0KnownZero, Op0KnownOne, 0))
return &I;
if (SimplifyDemandedBits(I.getOperandUse(1),
- APInt::getAllOnesValue(BitWidth),
- Op1KnownZero, Op1KnownOne, 0))
+ APInt::getAllOnesValue(BitWidth), Op1KnownZero,
+ Op1KnownOne, 0))
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
@@ -3094,9 +3085,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
- if (RHSC->isNullValue() && DL &&
- DL->getIntPtrType(RHSC->getType()) ==
- LHSI->getOperand(0)->getType())
+ if (RHSC->isNullValue() &&
+ DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
Constant::getNullValue(LHSI->getOperand(0)->getType()));
break;
@@ -3428,7 +3418,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// if A is a power of 2.
if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Op1, m_Zero()) &&
- isKnownToBeAPowerOfTwo(A, false, 0, AC, &I, DT) && I.isEquality())
+ isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality())
return new ICmpInst(I.getInversePredicate(),
Builder->CreateAnd(A, B),
Op1);
@@ -3563,6 +3553,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
}
+ // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
+ if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
+ match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
+ unsigned TypeBits = Cst1->getBitWidth();
+ unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+ if (ShAmt < TypeBits && ShAmt != 0) {
+ Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+ APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
+ Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+ I.getName() + ".mask");
+ return new ICmpInst(I.getPredicate(), And,
+ Constant::getNullValue(Cst1->getType()));
+ }
+ }
+
// Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
// "icmp (and X, mask), cst"
uint64_t ShAmt = 0;
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 2fd5318..fb2321d 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -158,10 +158,10 @@ private:
AssumptionCache *AC;
TargetLibraryInfo *TLI;
DominatorTree *DT;
+ const DataLayout &DL;
// Optional analyses. When non-null, these can both be used to do better
// combining and will be updated to reflect any changes.
- const DataLayout *DL;
LoopInfo *LI;
bool MadeIRChange;
@@ -169,7 +169,7 @@ private:
public:
InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI,
- DominatorTree *DT, const DataLayout *DL, LoopInfo *LI)
+ DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
: Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
@@ -180,7 +180,7 @@ public:
AssumptionCache *getAssumptionCache() const { return AC; }
- const DataLayout *getDataLayout() const { return DL; }
+ const DataLayout &getDataLayout() const { return DL; }
DominatorTree *getDominatorTree() const { return DT; }
@@ -330,17 +330,17 @@ private:
Type *Ty);
Instruction *visitCallSite(CallSite CS);
- Instruction *tryOptimizeCall(CallInst *CI, const DataLayout *DL);
+ Instruction *tryOptimizeCall(CallInst *CI);
bool transformConstExprCastCall(CallSite CS);
Instruction *transformCallThroughTrampoline(CallSite CS,
IntrinsicInst *Tramp);
Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
- bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction *CxtI);
- bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction *CxtI);
+ bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
+ bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
@@ -372,6 +372,10 @@ public:
/// I to the worklist, replace all uses of I with the new value, then return
/// I, so that the inst combiner will know that I was modified.
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+ // If there are no uses to replace, then we return nullptr to indicate that
+ // no changes were made to the program.
+ if (I.use_empty()) return nullptr;
+
Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
// If we are replacing the instruction with itself, this must be in a
@@ -423,7 +427,7 @@ public:
}
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- unsigned Depth = 0, Instruction *CxtI = nullptr) const {
+ unsigned Depth, Instruction *CxtI) const {
return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
DT);
}
@@ -468,7 +472,7 @@ private:
/// bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
- Instruction *CxtI = nullptr);
+ Instruction *CxtI);
bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index b9eb986..6b0f268 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -164,62 +164,75 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
return nullptr;
}
-Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
- // Ensure that the alloca array size argument has type intptr_t, so that
- // any casting is exposed early.
- if (DL) {
- Type *IntPtrTy = DL->getIntPtrType(AI.getType());
- if (AI.getArraySize()->getType() != IntPtrTy) {
- Value *V = Builder->CreateIntCast(AI.getArraySize(),
- IntPtrTy, false);
- AI.setOperand(0, V);
- return &AI;
- }
+static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
+ // Check for array size of 1 (scalar allocation).
+ if (!AI.isArrayAllocation()) {
+ // i32 1 is the canonical array size for scalar allocations.
+ if (AI.getArraySize()->getType()->isIntegerTy(32))
+ return nullptr;
+
+ // Canonicalize it.
+ Value *V = IC.Builder->getInt32(1);
+ AI.setOperand(0, V);
+ return &AI;
}
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
- if (AI.isArrayAllocation()) { // Check C != 1
- if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
- Type *NewTy =
- ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
- New->setAlignment(AI.getAlignment());
-
- // Scan to the end of the allocation instructions, to skip over a block of
- // allocas if possible...also skip interleaved debug info
- //
- BasicBlock::iterator It = New;
- while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
-
- // Now that I is pointing to the first non-allocation-inst in the block,
- // insert our getelementptr instruction...
- //
- Type *IdxTy = DL
- ? DL->getIntPtrType(AI.getType())
- : Type::getInt64Ty(AI.getContext());
- Value *NullIdx = Constant::getNullValue(IdxTy);
- Value *Idx[2] = { NullIdx, NullIdx };
- Instruction *GEP =
+ if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+ Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+ AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName());
+ New->setAlignment(AI.getAlignment());
+
+ // Scan to the end of the allocation instructions, to skip over a block of
+ // allocas if possible...also skip interleaved debug info
+ //
+ BasicBlock::iterator It = New;
+ while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
+ ++It;
+
+ // Now that I is pointing to the first non-allocation-inst in the block,
+ // insert our getelementptr instruction...
+ //
+ Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ Value *NullIdx = Constant::getNullValue(IdxTy);
+ Value *Idx[2] = {NullIdx, NullIdx};
+ Instruction *GEP =
GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
- InsertNewInstBefore(GEP, *It);
+ IC.InsertNewInstBefore(GEP, *It);
- // Now make everything use the getelementptr instead of the original
- // allocation.
- return ReplaceInstUsesWith(AI, GEP);
- } else if (isa<UndefValue>(AI.getArraySize())) {
- return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
- }
+ // Now make everything use the getelementptr instead of the original
+ // allocation.
+ return IC.ReplaceInstUsesWith(AI, GEP);
}
- if (DL && AI.getAllocatedType()->isSized()) {
+ if (isa<UndefValue>(AI.getArraySize()))
+ return IC.ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ if (auto *I = simplifyAllocaArraySize(*this, AI))
+ return I;
+
+ if (AI.getAllocatedType()->isSized()) {
// If the alignment is 0 (unspecified), assign it the preferred alignment.
if (AI.getAlignment() == 0)
- AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType()));
+ AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
// Move all alloca's of zero byte objects to the entry block and merge them
// together. Note that we only do this for alloca's, because malloc should
// allocate and return a unique pointer, even for a zero byte allocation.
- if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) {
+ if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
// For a zero sized alloca there is no point in doing an array allocation.
// This is helpful if the array size is a complicated expression not used
// elsewhere.
@@ -237,7 +250,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// dominance as the array size was forced to a constant earlier already.
AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
- DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
+ DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
AI.moveBefore(FirstInst);
return &AI;
}
@@ -246,7 +259,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// assign it the preferred alignment.
if (EntryAI->getAlignment() == 0)
EntryAI->setAlignment(
- DL->getPrefTypeAlignment(EntryAI->getAllocatedType()));
+ DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
// Replace this zero-sized alloca with the one at the start of the entry
// block after ensuring that the address will be aligned enough for both
// types.
@@ -270,7 +283,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
SmallVector<Instruction *, 4> ToDelete;
if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
unsigned SourceAlign = getOrEnforceKnownAlignment(
- Copy->getSource(), AI.getAlignment(), DL, AC, &AI, DT);
+ Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
if (AI.getAlignment() <= SourceAlign) {
DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
@@ -439,22 +452,22 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
return nullptr;
Type *Ty = LI.getType();
+ const DataLayout &DL = IC.getDataLayout();
// Try to canonicalize loads which are only ever stored to operate over
// integers instead of any other type. We only do this when the loaded type
// is sized and has a size exactly the same as its store size and the store
// size is a legal integer type.
- const DataLayout *DL = IC.getDataLayout();
- if (!Ty->isIntegerTy() && Ty->isSized() && DL &&
- DL->isLegalInteger(DL->getTypeStoreSizeInBits(Ty)) &&
- DL->getTypeStoreSizeInBits(Ty) == DL->getTypeSizeInBits(Ty)) {
+ if (!Ty->isIntegerTy() && Ty->isSized() &&
+ DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
+ DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
auto *SI = dyn_cast<StoreInst>(U);
return SI && SI->getPointerOperand() != &LI;
})) {
LoadInst *NewLoad = combineLoadToNewType(
IC, LI,
- Type::getIntNTy(LI.getContext(), DL->getTypeStoreSizeInBits(Ty)));
+ Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
// Replace all the stores with stores of the newly loaded value.
for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
auto *SI = cast<StoreInst>(*UI++);
@@ -489,7 +502,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
//
// FIXME: This should probably live in ValueTracking (or similar).
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
- const DataLayout *DL) {
+ const DataLayout &DL) {
SmallPtrSet<Value *, 4> Visited;
SmallVector<Value *, 4> Worklist(1, V);
@@ -529,7 +542,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
if (!CS)
return false;
- uint64_t TypeSize = DL->getTypeAllocSize(AI->getAllocatedType());
+ uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
// Make sure that, even if the multiplication below would wrap as an
// uint64_t, we still do the right thing.
if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
@@ -541,7 +554,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
return false;
- uint64_t InitSize = DL->getTypeAllocSize(GV->getType()->getElementType());
+ uint64_t InitSize = DL.getTypeAllocSize(GV->getType()->getElementType());
if (InitSize > MaxSize)
return false;
continue;
@@ -570,8 +583,7 @@ static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
// offsets those indices implied.
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
Instruction *MemI, unsigned &Idx) {
- const DataLayout *DL = IC.getDataLayout();
- if (GEPI->getNumOperands() < 2 || !DL)
+ if (GEPI->getNumOperands() < 2)
return false;
// Find the first non-zero index of a GEP. If all indices are zero, return
@@ -603,7 +615,8 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
GetElementPtrInst::getIndexedType(GEPI->getOperand(0)->getType(), Ops);
if (!AllocTy || !AllocTy->isSized())
return false;
- uint64_t TyAllocSize = DL->getTypeAllocSize(AllocTy);
+ const DataLayout &DL = IC.getDataLayout();
+ uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
// If there are more indices after the one we might replace with a zero, make
// sure they're all non-negative. If any of them are negative, the overall
@@ -665,18 +678,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
return Res;
// Attempt to improve the alignment.
- if (DL) {
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Op, DL->getPrefTypeAlignment(LI.getType()), DL, AC, &LI, DT);
- unsigned LoadAlign = LI.getAlignment();
- unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
- DL->getABITypeAlignment(LI.getType());
-
- if (KnownAlign > EffectiveLoadAlign)
- LI.setAlignment(KnownAlign);
- else if (LoadAlign == 0)
- LI.setAlignment(EffectiveLoadAlign);
- }
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign =
+ LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
+ LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
// Replace GEP indices if possible.
if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
@@ -738,8 +749,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
// load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
unsigned Align = LI.getAlignment();
- if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, DL) &&
- isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, DL)) {
+ if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align) &&
+ isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align)) {
LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
SI->getOperand(1)->getName()+".val");
LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
@@ -807,6 +818,30 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
return false;
}
+static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
+ // FIXME: We could probably with some care handle both volatile and atomic
+ // stores here but it isn't clear that this is important.
+ if (!SI.isSimple())
+ return false;
+
+ Value *V = SI.getValueOperand();
+ Type *T = V->getType();
+
+ if (!T->isAggregateType())
+ return false;
+
+ if (StructType *ST = dyn_cast<StructType>(T)) {
+ // If the struct only have one element, we unpack.
+ if (ST->getNumElements() == 1) {
+ V = IC.Builder->CreateExtractValue(V, 0);
+ combineStoreToNewValue(IC, SI, V);
+ return true;
+ }
+ }
+
+ return false;
+}
+
/// equivalentAddressValues - Test if A and B will obviously have the same
/// value. This includes recognizing that %t0 and %t1 will have the same
/// value in code like this:
@@ -845,18 +880,20 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
return EraseInstFromFunction(SI);
// Attempt to improve the alignment.
- if (DL) {
- unsigned KnownAlign = getOrEnforceKnownAlignment(
- Ptr, DL->getPrefTypeAlignment(Val->getType()), DL, AC, &SI, DT);
- unsigned StoreAlign = SI.getAlignment();
- unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
- DL->getABITypeAlignment(Val->getType());
-
- if (KnownAlign > EffectiveStoreAlign)
- SI.setAlignment(KnownAlign);
- else if (StoreAlign == 0)
- SI.setAlignment(EffectiveStoreAlign);
- }
+ unsigned KnownAlign = getOrEnforceKnownAlignment(
+ Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign =
+ StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
+ SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
+
+ // Try to canonicalize the stored type.
+ if (unpackStoreToAggregate(*this, SI))
+ return EraseInstFromFunction(SI);
// Replace GEP indices if possible.
if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index c48e3c9..35513f1 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -26,7 +26,7 @@ using namespace PatternMatch;
/// where it is known to be non-zero. If this allows us to simplify the
/// computation, do so and return the new operand, otherwise return null.
static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
// code.
@@ -47,8 +47,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
// inexact. Similarly for <<.
if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
if (I->isLogicalShift() &&
- isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0,
- IC.getAssumptionCache(), CxtI,
+ isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0,
+ IC.getAssumptionCache(), &CxtI,
IC.getDominatorTree())) {
// We know that this is an exact/nuw shift and that the input is a
// non-zero context as well.
@@ -126,7 +126,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
/// \brief Return true if we can prove that:
/// (mul LHS, RHS) === (mul nsw LHS, RHS)
bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
- Instruction *CxtI) {
+ Instruction &CxtI) {
// Multiplying n * m significant bits yields a result of n + m significant
// bits. If the total number of significant bits does not exceed the
// result bit width (minus 1), there is no overflow.
@@ -137,8 +137,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
// Note that underestimating the number of sign bits gives a more
// conservative answer.
- unsigned SignBits = ComputeNumSignBits(LHS, 0, CxtI) +
- ComputeNumSignBits(RHS, 0, CxtI);
+ unsigned SignBits =
+ ComputeNumSignBits(LHS, 0, &CxtI) + ComputeNumSignBits(RHS, 0, &CxtI);
// First handle the easy case: if we have enough sign bits there's
// definitely no overflow.
@@ -157,8 +157,8 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS,
// For simplicity we just check if at least one side is not negative.
bool LHSNonNegative, LHSNegative;
bool RHSNonNegative, RHSNegative;
- ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, CxtI);
- ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, CxtI);
+ ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI);
+ ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI);
if (LHSNonNegative || RHSNonNegative)
return true;
}
@@ -375,7 +375,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, &I)) {
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
Changed = true;
I.setHasNoSignedWrap(true);
}
@@ -422,7 +422,7 @@ static bool isFiniteNonZeroFp(Constant *C) {
if (C->getType()->isVectorTy()) {
for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
++I) {
- ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
if (!CFP || !CFP->getValueAPF().isFiniteNonZero())
return false;
}
@@ -437,7 +437,7 @@ static bool isNormalFp(Constant *C) {
if (C->getType()->isVectorTy()) {
for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E;
++I) {
- ConstantFP *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(I));
+ ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(C->getAggregateElement(I));
if (!CFP || !CFP->getValueAPF().isNormal())
return false;
}
@@ -780,7 +780,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
I.setOperand(1, V);
return &I;
}
@@ -1155,7 +1155,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
return BO;
}
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
// X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
// Safe because the only negative value (1 << Y) can take on is
// INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -1338,7 +1338,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
// The RHS is known non-zero.
- if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, &I)) {
+ if (Value *V = simplifyValueKnownNonZero(I.getOperand(1), *this, I)) {
I.setOperand(1, V);
return &I;
}
@@ -1385,7 +1385,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
I.getType());
// X urem Y -> X and Y-1, where Y is a power of 2,
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, AC, &I, DT)) {
+ if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
Constant *N1 = Constant::getAllOnesValue(I.getType());
Value *Add = Builder->CreateAdd(Op1, N1);
return BinaryOperator::CreateAnd(Op0, Add);
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 0e73db8..ca2caed 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/IR/DataLayout.h"
using namespace llvm;
#define DEBUG_TYPE "instcombine"
@@ -231,7 +230,8 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
Value *Base = FixedOperands[0];
GetElementPtrInst *NewGEP =
- GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
+ GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base,
+ makeArrayRef(FixedOperands).slice(1));
if (AllInBounds) NewGEP->setIsInBounds();
NewGEP->setDebugLoc(FirstInst->getDebugLoc());
return NewGEP;
@@ -891,8 +891,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
// introduced when SROA promotes an aggregate to a single large integer type.
- if (PN.getType()->isIntegerTy() && DL &&
- !DL->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (PN.getType()->isIntegerTy() &&
+ !DL.isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index dd0e65f..b28611f 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -312,9 +312,9 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const DataLayout *TD,
const TargetLibraryInfo *TLI,
- DominatorTree *DT, AssumptionCache *AC) {
+ const DataLayout &DL, DominatorTree *DT,
+ AssumptionCache *AC) {
// Trivial replacement.
if (V == Op)
return RepOp;
@@ -326,18 +326,18 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// If this is a binary operator, try to simplify it with the replaced op.
if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
if (B->getOperand(0) == Op)
- return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
+ return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), DL, TLI);
if (B->getOperand(1) == Op)
- return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
+ return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, DL, TLI);
}
// Same for CmpInsts.
if (CmpInst *C = dyn_cast<CmpInst>(I)) {
if (C->getOperand(0) == Op)
- return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+ return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), DL,
TLI, DT, AC);
if (C->getOperand(1) == Op)
- return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+ return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, DL,
TLI, DT, AC);
}
@@ -361,14 +361,14 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (ConstOps.size() == I->getNumOperands()) {
if (CmpInst *C = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
- ConstOps[1], TD, TLI);
+ ConstOps[1], DL, TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I))
if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
+ return ConstantFoldLoadFromConstPtr(ConstOps[0], DL);
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- ConstOps, TD, TLI);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), ConstOps,
+ DL, TLI);
}
}
@@ -635,25 +635,25 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
// arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
TrueVal)
return ReplaceInstUsesWith(SI, FalseVal);
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
FalseVal)
return ReplaceInstUsesWith(SI, FalseVal);
} else if (Pred == ICmpInst::ICMP_NE) {
- if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
FalseVal ||
- SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
FalseVal)
return ReplaceInstUsesWith(SI, TrueVal);
- if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, DL, TLI, DT, AC) ==
+ if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TLI, DL, DT, AC) ==
TrueVal ||
- SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, DL, TLI, DT, AC) ==
+ SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TLI, DL, DT, AC) ==
TrueVal)
return ReplaceInstUsesWith(SI, TrueVal);
}
@@ -927,7 +927,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return BinaryOperator::CreateAnd(NotCond, FalseVal);
}
if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
- if (C->getZExtValue() == false) {
+ if (!C->getZExtValue()) {
// Change: A = select B, C, false --> A = and B, C
return BinaryOperator::CreateAnd(CondVal, TrueVal);
}
@@ -1203,37 +1203,41 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
return NV;
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
- // select(C, select(C, a, b), c) -> select(C, a, c)
- if (TrueSI->getCondition() == CondVal) {
- if (SI.getTrueValue() == TrueSI->getTrueValue())
- return nullptr;
- SI.setOperand(1, TrueSI->getTrueValue());
- return &SI;
- }
- // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
- // We choose this as normal form to enable folding on the And and shortening
- // paths for the values (this helps GetUnderlyingObjects() for example).
- if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
- Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
- SI.setOperand(0, And);
- SI.setOperand(1, TrueSI->getTrueValue());
- return &SI;
+ if (TrueSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, select(C, a, b), c) -> select(C, a, c)
+ if (TrueSI->getCondition() == CondVal) {
+ if (SI.getTrueValue() == TrueSI->getTrueValue())
+ return nullptr;
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
+ // select(C0, select(C1, a, b), b) -> select(C0&C1, a, b)
+ // We choose this as normal form to enable folding on the And and shortening
+ // paths for the values (this helps GetUnderlyingObjects() for example).
+ if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) {
+ Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition());
+ SI.setOperand(0, And);
+ SI.setOperand(1, TrueSI->getTrueValue());
+ return &SI;
+ }
}
}
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
- // select(C, a, select(C, b, c)) -> select(C, a, c)
- if (FalseSI->getCondition() == CondVal) {
- if (SI.getFalseValue() == FalseSI->getFalseValue())
- return nullptr;
- SI.setOperand(2, FalseSI->getFalseValue());
- return &SI;
- }
- // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
- if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
- Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
- SI.setOperand(0, Or);
- SI.setOperand(2, FalseSI->getFalseValue());
- return &SI;
+ if (FalseSI->getCondition()->getType() == CondVal->getType()) {
+ // select(C, a, select(C, b, c)) -> select(C, a, c)
+ if (FalseSI->getCondition() == CondVal) {
+ if (SI.getFalseValue() == FalseSI->getFalseValue())
+ return nullptr;
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
+ // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b)
+ if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) {
+ Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition());
+ SI.setOperand(0, Or);
+ SI.setOperand(2, FalseSI->getFalseValue());
+ return &SI;
+ }
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index b4976e0..a414ec6 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -187,7 +187,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
/// this value inserts the new computation that produces the shifted value.
static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
- InstCombiner &IC) {
+ InstCombiner &IC, const DataLayout &DL) {
// We can always evaluate constants shifted.
if (Constant *C = dyn_cast<Constant>(V)) {
if (isLeftShift)
@@ -196,8 +196,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
V = IC.Builder->CreateLShr(C, NumBits);
// If we got a constantexpr back, try to simplify it with TD info.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
- IC.getTargetLibraryInfo());
+ V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo());
return V;
}
@@ -210,8 +209,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Or:
case Instruction::Xor:
// Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
- I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
- I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(
+ 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::Shl: {
@@ -297,8 +298,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
}
case Instruction::Select:
- I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
- I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ I->setOperand(
+ 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+ I->setOperand(
+ 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -306,8 +309,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
- NumBits, isLeftShift, IC));
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits,
+ isLeftShift, IC, DL));
return PN;
}
}
@@ -337,8 +340,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
- return ReplaceInstUsesWith(I,
- GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this));
+ return ReplaceInstUsesWith(
+ I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL));
}
// See if we can simplify any instructions used by the instruction whose sole
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index c5603aa..cd391d0 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
@@ -70,8 +69,8 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
- Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
- KnownZero, KnownOne, 0, &Inst);
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
+ 0, &Inst);
if (!V) return false;
if (V == &Inst) return true;
ReplaceInstUsesWith(Inst, V);
@@ -84,9 +83,9 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
- Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
- KnownZero, KnownOne, Depth,
- dyn_cast<Instruction>(U.getUser()));
+ Value *NewVal =
+ SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth,
+ dyn_cast<Instruction>(U.getUser()));
if (!NewVal) return false;
U = NewVal;
return true;
@@ -122,15 +121,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = V->getType();
- assert((DL || !VTy->isPointerTy()) &&
- "SimplifyDemandedBits needs to know bit widths!");
- assert((!DL || DL->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
- (!VTy->isIntOrIntVectorTy() ||
- VTy->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "Value *V, DemandedMask, KnownZero and KnownOne "
- "must have same BitWidth");
+ assert(
+ (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
+ KnownZero.getBitWidth() == BitWidth &&
+ KnownOne.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// We know all of the bits for a constant!
KnownOne = CI->getValue() & DemandedMask;
@@ -174,9 +170,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known 1 on one side, return the other.
@@ -198,9 +194,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known zero on one side, return the
@@ -225,9 +221,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If all of the demanded bits are known zero on one side, return the
@@ -256,10 +252,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -294,10 +290,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::Or:
// If either the LHS or the RHS are One, the result is One.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -336,10 +332,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownOne = RHSKnownOne | LHSKnownOne;
break;
case Instruction::Xor: {
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -423,10 +419,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
}
case Instruction::Select:
- if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
- RHSKnownZero, RHSKnownOne, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
+ RHSKnownOne, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -445,8 +441,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.zext(truncBf);
KnownZero = KnownZero.zext(truncBf);
KnownOne = KnownOne.zext(truncBf);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
DemandedMask = DemandedMask.trunc(BitWidth);
KnownZero = KnownZero.trunc(BitWidth);
@@ -471,8 +467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Don't touch a vector-to-scalar bitcast.
return nullptr;
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
break;
@@ -483,8 +479,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
+ KnownOne, Depth + 1))
return I;
DemandedMask = DemandedMask.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -510,8 +506,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
+ KnownOne, Depth + 1))
return I;
InputDemandedBits = InputDemandedBits.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -552,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Find information about known zero/one bits in the input.
if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
// If the RHS of the add has bits set that can't affect the input, reduce
@@ -602,9 +598,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// significant bit and all those below it.
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1) ||
+ LHSKnownZero, LHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
}
}
@@ -619,9 +615,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
uint32_t NLZ = DemandedMask.countLeadingZeros();
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1) ||
+ LHSKnownZero, LHSKnownOne, Depth + 1) ||
SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ LHSKnownZero, LHSKnownOne, Depth + 1))
return I;
}
@@ -662,8 +658,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
else if (IOp->hasNoUnsignedWrap())
DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero <<= ShiftAmt;
@@ -686,8 +682,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<LShrOperator>(I)->isExact())
DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
@@ -731,8 +727,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<AShrOperator>(I)->isExact())
DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
- KnownZero, KnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
+ KnownOne, Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
@@ -772,8 +768,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
- LHSKnownZero, LHSKnownOne, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
// The low bits of LHS are unchanged by the srem.
@@ -798,7 +794,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// remainder is zero.
if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1,
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
CxtI);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
@@ -808,10 +804,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
case Instruction::URem: {
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
- KnownZero2, KnownOne2, Depth+1) ||
- SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
- KnownZero2, KnownOne2, Depth+1))
+ if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1) ||
+ SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
+ KnownOne2, Depth + 1))
return I;
unsigned Leaders = KnownZero2.countLeadingOnes();
@@ -1051,7 +1047,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;
}
@@ -1069,7 +1065,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt DemandedElts2 = DemandedElts;
DemandedElts2.clearBit(IdxNo);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
- UndefElts, Depth+1);
+ UndefElts, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
// The inserted element is defined.
@@ -1097,12 +1093,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt UndefElts4(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
- UndefElts4, Depth+1);
+ UndefElts4, Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
APInt UndefElts3(LHSVWidth, 0);
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
- UndefElts3, Depth+1);
+ UndefElts3, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
bool NewUndefElts = false;
@@ -1152,12 +1148,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
}
- TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
// Output elements are undefined if both are undefined.
@@ -1204,7 +1200,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// div/rem demand all inputs, because they don't want divide by zero.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) {
I->setOperand(0, TmpV);
MadeChange = true;
@@ -1238,11 +1234,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Instruction::Sub:
case Instruction::Mul:
// div/rem demand all inputs, because they don't want divide by zero.
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
// Output elements are undefined if both are undefined. Consider things
@@ -1251,8 +1247,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
case Instruction::FPTrunc:
case Instruction::FPExt:
- TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, UndefElts,
+ Depth + 1);
if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
break;
@@ -1273,10 +1269,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts, Depth+1);
+ UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth+1);
+ UndefElts2, Depth + 1);
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
// If only the low elt is demanded and this is a scalarizable intrinsic,
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index e07efb5..b6beb65 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -202,8 +202,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
APInt UndefElts(VectorWidth, 0);
APInt DemandedMask(VectorWidth, 0);
DemandedMask.setBit(IndexVal);
- if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
- DemandedMask, UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask,
+ UndefElts)) {
EI.setOperand(0, V);
return &EI;
}
@@ -733,7 +733,8 @@ static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
case Instruction::GetElementPtr: {
Value *Ptr = NewOps[0];
ArrayRef<Value*> Idx = NewOps.slice(1);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
return GEP;
}
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 88fcd53..90551e4 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -57,6 +57,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -75,7 +76,7 @@ STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc , "Number of reassociations");
Value *InstCombiner::EmitGEPOffset(User *GEP) {
- return llvm::EmitGEPOffset(Builder, *getDataLayout(), GEP);
+ return llvm::EmitGEPOffset(Builder, DL, GEP);
}
/// ShouldChangeType - Return true if it is desirable to convert a computation
@@ -84,13 +85,10 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
assert(From->isIntegerTy() && To->isIntegerTy());
- // If we don't have DL, we don't know if the source/dest are legal.
- if (!DL) return false;
-
unsigned FromWidth = From->getPrimitiveSizeInBits();
unsigned ToWidth = To->getPrimitiveSizeInBits();
- bool FromLegal = DL->isLegalInteger(FromWidth);
- bool ToLegal = DL->isLegalInteger(ToWidth);
+ bool FromLegal = DL.isLegalInteger(FromWidth);
+ bool ToLegal = DL.isLegalInteger(ToWidth);
// If this is a legal integer from type, and the result would be an illegal
// type, don't do the transformation.
@@ -445,7 +443,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
- const DataLayout *DL, BinaryOperator &I,
+ const DataLayout &DL, BinaryOperator &I,
Instruction::BinaryOps InnerOpcode, Value *A,
Value *B, Value *C, Value *D) {
@@ -872,12 +870,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
/// will land us at the specified offset. If so, fill them into NewIndices and
/// return the resultant element type, otherwise return null.
Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices) {
+ SmallVectorImpl<Value *> &NewIndices) {
assert(PtrTy->isPtrOrPtrVectorTy());
- if (!DL)
- return nullptr;
-
Type *Ty = PtrTy->getPointerElementType();
if (!Ty->isSized())
return nullptr;
@@ -885,9 +880,9 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
// is something like [0 x {int, int}]
- Type *IntPtrTy = DL->getIntPtrType(PtrTy);
+ Type *IntPtrTy = DL.getIntPtrType(PtrTy);
int64_t FirstIdx = 0;
- if (int64_t TySize = DL->getTypeAllocSize(Ty)) {
+ if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
FirstIdx = Offset/TySize;
Offset -= FirstIdx*TySize;
@@ -905,11 +900,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
// Index into the types. If we fail, set OrigBase to null.
while (Offset) {
// Indexing into tail padding between struct/array elements.
- if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
+ if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
return nullptr;
if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
assert(Offset < (int64_t)SL->getSizeInBytes() &&
"Offset must stay within the indexed type");
@@ -920,7 +915,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
Offset -= SL->getElementOffset(Elt);
Ty = STy->getElementType(Elt);
} else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- uint64_t EltSize = DL->getTypeAllocSize(AT->getElementType());
+ uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
assert(EltSize && "Cannot index into a zero-sized array");
NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
Offset %= EltSize;
@@ -1214,7 +1209,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
// It may not be safe to reorder shuffles and things like div, urem, etc.
// because we may trap when executing those ops on unknown vector elements.
// See PR20059.
- if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr;
+ if (!isSafeToSpeculativelyExecute(&Inst))
+ return nullptr;
unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
@@ -1300,37 +1296,37 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Eliminate unneeded casts for indices, and replace indices which displace
// by multiples of a zero size type with zero.
- if (DL) {
- bool MadeChange = false;
- Type *IntPtrTy = DL->getIntPtrType(GEP.getPointerOperandType());
-
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
- I != E; ++I, ++GTI) {
- // Skip indices into struct types.
- SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
- if (!SeqTy) continue;
-
- // If the element type has zero size then any index over it is equivalent
- // to an index of zero, so replace it with zero if it is not zero already.
- if (SeqTy->getElementType()->isSized() &&
- DL->getTypeAllocSize(SeqTy->getElementType()) == 0)
- if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
- *I = Constant::getNullValue(IntPtrTy);
- MadeChange = true;
- }
+ bool MadeChange = false;
+ Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
+ ++I, ++GTI) {
+ // Skip indices into struct types.
+ SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+ if (!SeqTy)
+ continue;
- Type *IndexTy = (*I)->getType();
- if (IndexTy != IntPtrTy) {
- // If we are using a wider index than needed for this platform, shrink
- // it to what we need. If narrower, sign-extend it to what we need.
- // This explicit cast can make subsequent optimizations more obvious.
- *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ // If the element type has zero size then any index over it is equivalent
+ // to an index of zero, so replace it with zero if it is not zero already.
+ if (SeqTy->getElementType()->isSized() &&
+ DL.getTypeAllocSize(SeqTy->getElementType()) == 0)
+ if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+ *I = Constant::getNullValue(IntPtrTy);
MadeChange = true;
}
+
+ Type *IndexTy = (*I)->getType();
+ if (IndexTy != IntPtrTy) {
+ // If we are using a wider index than needed for this platform, shrink
+ // it to what we need. If narrower, sign-extend it to what we need.
+ // This explicit cast can make subsequent optimizations more obvious.
+ *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+ MadeChange = true;
}
- if (MadeChange) return &GEP;
}
+ if (MadeChange)
+ return &GEP;
// Check to see if the inputs to the PHI node are getelementptr instructions.
if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
@@ -1338,6 +1334,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Op1)
return nullptr;
+ // Don't fold a GEP into itself through a PHI node. This can only happen
+ // through the back-edge of a loop. Folding a GEP into itself means that
+ // the value of the previous iteration needs to be stored in the meantime,
+ // thus requiring an additional register variable to be live, but not
+ // actually achieving anything (the GEP still needs to be executed once per
+ // loop iteration).
+ if (Op1 == &GEP)
+ return nullptr;
+
signed DI = -1;
for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
@@ -1345,6 +1350,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
return nullptr;
+ // As for Op1 above, don't try to fold a GEP into itself.
+ if (Op2 == &GEP)
+ return nullptr;
+
// Keep track of the type as we walk the GEP.
Type *CurTy = Op1->getOperand(0)->getType()->getScalarType();
@@ -1481,19 +1490,22 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
if (!Indices.empty())
- return (GEP.isInBounds() && Src->isInBounds()) ?
- GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices,
- GEP.getName()) :
- GetElementPtrInst::Create(Src->getOperand(0), Indices, GEP.getName());
+ return GEP.isInBounds() && Src->isInBounds()
+ ? GetElementPtrInst::CreateInBounds(
+ Src->getSourceElementType(), Src->getOperand(0), Indices,
+ GEP.getName())
+ : GetElementPtrInst::Create(Src->getSourceElementType(),
+ Src->getOperand(0), Indices,
+ GEP.getName());
}
- if (DL && GEP.getNumIndices() == 1) {
+ if (GEP.getNumIndices() == 1) {
unsigned AS = GEP.getPointerAddressSpace();
if (GEP.getOperand(1)->getType()->getScalarSizeInBits() ==
- DL->getPointerSizeInBits(AS)) {
+ DL.getPointerSizeInBits(AS)) {
Type *PtrTy = GEP.getPointerOperandType();
Type *Ty = PtrTy->getPointerElementType();
- uint64_t TyAllocSize = DL->getTypeAllocSize(Ty);
+ uint64_t TyAllocSize = DL.getTypeAllocSize(Ty);
bool Matched = false;
uint64_t C;
@@ -1562,8 +1574,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
// -> GEP i8* X, ...
SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
- GetElementPtrInst *Res =
- GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
+ GetElementPtrInst *Res = GetElementPtrInst::Create(
+ StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
return Res;
@@ -1599,9 +1611,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %0 = GEP [10 x i8] addrspace(1)* X, ...
// addrspacecast i8 addrspace(1)* %0 to i8*
SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(StrippedPtr, Idx,
+ GEP.getName())
+ : Builder->CreateGEP(StrippedPtrTy->getElementType(),
+ StrippedPtr, Idx, GEP.getName());
return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
@@ -1612,14 +1627,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
Type *SrcElTy = StrippedPtrTy->getElementType();
Type *ResElTy = PtrOp->getType()->getPointerElementType();
- if (DL && SrcElTy->isArrayTy() &&
- DL->getTypeAllocSize(SrcElTy->getArrayElementType()) ==
- DL->getTypeAllocSize(ResElTy)) {
- Type *IdxType = DL->getIntPtrType(GEP.getType());
+ if (SrcElTy->isArrayTy() &&
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType()) ==
+ DL.getTypeAllocSize(ResElTy)) {
+ Type *IdxType = DL.getIntPtrType(GEP.getType());
Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };
- Value *NewGEP = GEP.isInBounds() ?
- Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds()
+ ? Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName())
+ : Builder->CreateGEP(StrippedPtrTy->getElementType(),
+ StrippedPtr, Idx, GEP.getName());
// V and GEP are both pointer types --> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1630,11 +1647,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// %V = mul i64 %N, 4
// %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
// into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
- if (DL && ResElTy->isSized() && SrcElTy->isSized()) {
+ if (ResElTy->isSized() && SrcElTy->isSized()) {
// Check that changing the type amounts to dividing the index by a scale
// factor.
- uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
- uint64_t SrcSize = DL->getTypeAllocSize(SrcElTy);
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy);
if (ResSize && SrcSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1642,7 +1659,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1650,9 +1667,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
- Value *NewGEP = GEP.isInBounds() && NSW ?
- Builder->CreateInBoundsGEP(StrippedPtr, NewIdx, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, NewIdx, GEP.getName());
+ Value *NewGEP =
+ GEP.isInBounds() && NSW
+ ? Builder->CreateInBoundsGEP(StrippedPtr, NewIdx,
+ GEP.getName())
+ : Builder->CreateGEP(StrippedPtrTy->getElementType(),
+ StrippedPtr, NewIdx, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
@@ -1665,13 +1685,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (DL && ResElTy->isSized() && SrcElTy->isSized() &&
- SrcElTy->isArrayTy()) {
+ if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) {
// Check that changing to the array element type amounts to dividing the
// index by a scale factor.
- uint64_t ResSize = DL->getTypeAllocSize(ResElTy);
- uint64_t ArrayEltSize
- = DL->getTypeAllocSize(SrcElTy->getArrayElementType());
+ uint64_t ResSize = DL.getTypeAllocSize(ResElTy);
+ uint64_t ArrayEltSize =
+ DL.getTypeAllocSize(SrcElTy->getArrayElementType());
if (ResSize && ArrayEltSize % ResSize == 0) {
Value *Idx = GEP.getOperand(1);
unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
@@ -1679,7 +1698,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Earlier transforms ensure that the index has type IntPtrType, which
// considerably simplifies the logic by eliminating implicit casts.
- assert(Idx->getType() == DL->getIntPtrType(GEP.getType()) &&
+ assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) &&
"Index not cast to pointer width?");
bool NSW;
@@ -1688,13 +1707,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// If the multiplication NewIdx * Scale may overflow then the new
// GEP may not be "inbounds".
Value *Off[2] = {
- Constant::getNullValue(DL->getIntPtrType(GEP.getType())),
- NewIdx
- };
+ Constant::getNullValue(DL.getIntPtrType(GEP.getType())),
+ NewIdx};
Value *NewGEP = GEP.isInBounds() && NSW ?
Builder->CreateInBoundsGEP(StrippedPtr, Off, GEP.getName()) :
- Builder->CreateGEP(StrippedPtr, Off, GEP.getName());
+ Builder->CreateGEP(SrcElTy, StrippedPtr, Off, GEP.getName());
// The NewGEP must be pointer typed, so must the old one -> BitCast
return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
GEP.getType());
@@ -1704,9 +1722,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- if (!DL)
- return nullptr;
-
// addrspacecast between types is canonicalized as a bitcast, then an
// addrspacecast. To take advantage of the below bitcast + struct GEP, look
// through the addrspacecast.
@@ -1727,10 +1742,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
Value *Operand = BCI->getOperand(0);
PointerType *OpType = cast<PointerType>(Operand->getType());
- unsigned OffsetBits = DL->getPointerTypeSizeInBits(GEP.getType());
+ unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType());
APInt Offset(OffsetBits, 0);
if (!isa<BitCastInst>(Operand) &&
- GEP.accumulateConstantOffset(*DL, Offset)) {
+ GEP.accumulateConstantOffset(DL, Offset)) {
// If this GEP instruction doesn't move the pointer, just replace the GEP
// with a bitcast of the real input to the dest type.
@@ -1761,7 +1776,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) {
Value *NGEP = GEP.isInBounds() ?
Builder->CreateInBoundsGEP(Operand, NewIndices) :
- Builder->CreateGEP(Operand, NewIndices);
+ Builder->CreateGEP(OpType->getElementType(), Operand, NewIndices);
if (NGEP->getType() == GEP.getType())
return ReplaceInstUsesWith(GEP, NGEP);
@@ -2012,6 +2027,15 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
+ // If the condition is irrelevant, remove the use so that other
+ // transforms on the condition become more effective.
+ if (BI.isConditional() &&
+ BI.getSuccessor(0) == BI.getSuccessor(1) &&
+ !isa<UndefValue>(BI.getCondition())) {
+ BI.setCondition(UndefValue::get(BI.getCondition()->getType()));
+ return &BI;
+ }
+
// Canonicalize fcmp_one -> fcmp_oeq
FCmpInst::Predicate FPred; Value *Y;
if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),
@@ -2051,7 +2075,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
Value *Cond = SI.getCondition();
unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Cond, KnownZero, KnownOne);
+ computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
@@ -2070,8 +2094,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
// x86 generates redundant zero-extenstion instructions if the operand is
// truncated to i8 or i16.
bool TruncCond = false;
- if (DL && BitWidth > NewWidth &&
- NewWidth >= DL->getLargestLegalIntTypeSize()) {
+ if (NewWidth > 0 && BitWidth > NewWidth &&
+ NewWidth >= DL.getLargestLegalIntTypeSize()) {
TruncCond = true;
IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
Builder->SetInsertPoint(&SI);
@@ -2632,7 +2656,7 @@ bool InstCombiner::run() {
}
// Instruction isn't dead, see if we can constant propagate it.
- if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+ if (!I->use_empty() && isa<Constant>(I->getOperand(0))) {
if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
@@ -2643,6 +2667,7 @@ bool InstCombiner::run() {
MadeIRChange = true;
continue;
}
+ }
// See if we can trivially sink this instruction to a successor basic block.
if (I->hasOneUse()) {
@@ -2756,10 +2781,9 @@ bool InstCombiner::run() {
/// many instructions are dead or constant). Additionally, if we find a branch
/// whose condition is a known constant, we only visit the reachable successors.
///
-static bool AddReachableCodeToWorklist(BasicBlock *BB,
- SmallPtrSetImpl<BasicBlock*> &Visited,
+static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
+ SmallPtrSetImpl<BasicBlock *> &Visited,
InstCombineWorklist &ICWorklist,
- const DataLayout *DL,
const TargetLibraryInfo *TLI) {
bool MadeIRChange = false;
SmallVector<BasicBlock*, 256> Worklist;
@@ -2797,23 +2821,22 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
continue;
}
- if (DL) {
- // See if we can constant fold its operands.
- for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
- i != e; ++i) {
- ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
- if (CE == nullptr) continue;
+ // See if we can constant fold its operands.
+ for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e;
+ ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+ if (CE == nullptr)
+ continue;
- Constant*& FoldRes = FoldedConstants[CE];
- if (!FoldRes)
- FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
- if (!FoldRes)
- FoldRes = CE;
+ Constant *&FoldRes = FoldedConstants[CE];
+ if (!FoldRes)
+ FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
+ if (!FoldRes)
+ FoldRes = CE;
- if (FoldRes != CE) {
- *i = FoldRes;
- MadeIRChange = true;
- }
+ if (FoldRes != CE) {
+ *i = FoldRes;
+ MadeIRChange = true;
}
}
@@ -2867,7 +2890,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
///
/// This also does basic constant propagation and other forward fixing to make
/// the combiner itself run much faster.
-static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL,
+static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
TargetLibraryInfo *TLI,
InstCombineWorklist &ICWorklist) {
bool MadeIRChange = false;
@@ -2877,7 +2900,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL,
// track of which blocks we visit.
SmallPtrSet<BasicBlock *, 64> Visited;
MadeIRChange |=
- AddReachableCodeToWorklist(F.begin(), Visited, ICWorklist, DL, TLI);
+ AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI);
// Do a quick scan over the function. If we find any blocks that are
// unreachable, remove any instructions inside of them. This prevents
@@ -2910,12 +2933,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout *DL,
return MadeIRChange;
}
-static bool combineInstructionsOverFunction(
- Function &F, InstCombineWorklist &Worklist, AssumptionCache &AC,
- TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout *DL = nullptr,
- LoopInfo *LI = nullptr) {
+static bool
+combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
+ AssumptionCache &AC, TargetLibraryInfo &TLI,
+ DominatorTree &DT, LoopInfo *LI = nullptr) {
// Minimizing size?
bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
+ auto &DL = F.getParent()->getDataLayout();
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
@@ -2950,15 +2974,13 @@ static bool combineInstructionsOverFunction(
PreservedAnalyses InstCombinePass::run(Function &F,
AnalysisManager<Function> *AM) {
- auto *DL = F.getParent()->getDataLayout();
-
auto &AC = AM->getResult<AssumptionAnalysis>(F);
auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
auto *LI = AM->getCachedResult<LoopAnalysis>(F);
- if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI))
+ if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -3007,12 +3029,10 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Optional analyses.
- auto *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- auto *DL = DLP ? &DLP->getDataLayout() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, DL, LI);
+ return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI);
}
char InstructionCombiningPass::ID = 0;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 882aab0..978c857 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -24,6 +24,9 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DataLayout.h"
@@ -43,12 +46,14 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <string>
#include <system_error>
@@ -70,17 +75,15 @@ static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
-static const size_t kMinStackMallocSize = 1 << 6; // 64B
+static const size_t kMinStackMallocSize = 1 << 6; // 64B
static const size_t kMaxStackMallocSize = 1 << 16; // 64K
static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
static const char *const kAsanModuleCtorName = "asan.module_ctor";
static const char *const kAsanModuleDtorName = "asan.module_dtor";
-static const uint64_t kAsanCtorAndDtorPriority = 1;
+static const uint64_t kAsanCtorAndDtorPriority = 1;
static const char *const kAsanReportErrorTemplate = "__asan_report_";
-static const char *const kAsanReportLoadN = "__asan_report_load_n";
-static const char *const kAsanReportStoreN = "__asan_report_store_n";
static const char *const kAsanRegisterGlobalsName = "__asan_register_globals";
static const char *const kAsanUnregisterGlobalsName =
"__asan_unregister_globals";
@@ -90,7 +93,7 @@ static const char *const kAsanInitName = "__asan_init_v5";
static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
-static const int kMaxAsanStackMallocSizeClass = 10;
+static const int kMaxAsanStackMallocSizeClass = 10;
static const char *const kAsanStackMallocNameTemplate = "__asan_stack_malloc_";
static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
static const char *const kAsanGenPrefix = "__asan_gen_";
@@ -103,10 +106,6 @@ static const char *const kAsanUnpoisonStackMemoryName =
static const char *const kAsanOptionDetectUAR =
"__asan_option_detect_stack_use_after_return";
-#ifndef NDEBUG
-static const int kAsanStackAfterReturnMagic = 0xf5;
-#endif
-
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
@@ -120,84 +119,110 @@ static const unsigned kAsanAllocaPartialVal2 = 0x000000cbU;
// This flag may need to be replaced with -f[no-]asan-reads.
static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
- cl::desc("instrument read instructions"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
- cl::desc("instrument write instructions"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
- cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
- cl::Hidden, cl::init(true));
-static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
- cl::desc("use instrumentation with slow path for all accesses"),
- cl::Hidden, cl::init(false));
+ cl::desc("instrument read instructions"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentWrites(
+ "asan-instrument-writes", cl::desc("instrument write instructions"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentAtomics(
+ "asan-instrument-atomics",
+ cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
+ cl::init(true));
+static cl::opt<bool> ClAlwaysSlowPath(
+ "asan-always-slow-path",
+ cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,
+ cl::init(false));
// This flag limits the number of instructions to be instrumented
// in any given BB. Normally, this should be set to unlimited (INT_MAX),
// but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
// set it to 10000.
-static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb",
- cl::init(10000),
- cl::desc("maximal number of instructions to instrument in any given BB"),
- cl::Hidden);
+static cl::opt<int> ClMaxInsnsToInstrumentPerBB(
+ "asan-max-ins-per-bb", cl::init(10000),
+ cl::desc("maximal number of instructions to instrument in any given BB"),
+ cl::Hidden);
// This flag may need to be replaced with -f[no]asan-stack.
-static cl::opt<bool> ClStack("asan-stack",
- cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"),
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
- cl::desc("Check return-after-free"), cl::Hidden, cl::init(true));
+ cl::desc("Check return-after-free"),
+ cl::Hidden, cl::init(true));
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
- cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+ cl::desc("Handle global objects"), cl::Hidden,
+ cl::init(true));
static cl::opt<bool> ClInitializers("asan-initialization-order",
- cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair",
- cl::desc("Instrument <, <=, >, >=, - with pointer operands"),
- cl::Hidden, cl::init(false));
-static cl::opt<unsigned> ClRealignStack("asan-realign-stack",
- cl::desc("Realign stack to the value of this flag (power of two)"),
- cl::Hidden, cl::init(32));
+ cl::desc("Handle C++ initializer order"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInvalidPointerPairs(
+ "asan-detect-invalid-pointer-pair",
+ cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden,
+ cl::init(false));
+static cl::opt<unsigned> ClRealignStack(
+ "asan-realign-stack",
+ cl::desc("Realign stack to the value of this flag (power of two)"),
+ cl::Hidden, cl::init(32));
static cl::opt<int> ClInstrumentationWithCallsThreshold(
"asan-instrumentation-with-call-threshold",
- cl::desc("If the function being instrumented contains more than "
- "this number of memory accesses, use callbacks instead of "
- "inline checks (-1 means never use callbacks)."),
- cl::Hidden, cl::init(7000));
+ cl::desc(
+ "If the function being instrumented contains more than "
+ "this number of memory accesses, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
+ cl::Hidden, cl::init(7000));
static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
- "asan-memory-access-callback-prefix",
- cl::desc("Prefix for memory access callbacks"), cl::Hidden,
- cl::init("__asan_"));
+ "asan-memory-access-callback-prefix",
+ cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+ cl::init("__asan_"));
static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas",
- cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(false));
+ cl::desc("instrument dynamic allocas"),
+ cl::Hidden, cl::init(false));
+static cl::opt<bool> ClSkipPromotableAllocas(
+ "asan-skip-promotable-allocas",
+ cl::desc("Do not instrument promotable allocas"), cl::Hidden,
+ cl::init(true));
// These flags allow to change the shadow mapping.
// The shadow mapping looks like
// Shadow = (Mem >> scale) + (1 << offset_log)
static cl::opt<int> ClMappingScale("asan-mapping-scale",
- cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
+ cl::desc("scale of asan shadow mapping"),
+ cl::Hidden, cl::init(0));
// Optimization flags. Not user visible, used mostly for testing
// and benchmarking the tool.
-static cl::opt<bool> ClOpt("asan-opt",
- cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true));
-static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
- cl::desc("Instrument the same temp just once"), cl::Hidden,
- cl::init(true));
+static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptSameTemp(
+ "asan-opt-same-temp", cl::desc("Instrument the same temp just once"),
+ cl::Hidden, cl::init(true));
static cl::opt<bool> ClOptGlobals("asan-opt-globals",
- cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
+ cl::desc("Don't instrument scalar globals"),
+ cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptStack(
+ "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"),
+ cl::Hidden, cl::init(false));
-static cl::opt<bool> ClCheckLifetime("asan-check-lifetime",
- cl::desc("Use llvm.lifetime intrinsics to insert extra checks"),
- cl::Hidden, cl::init(false));
+static cl::opt<bool> ClCheckLifetime(
+ "asan-check-lifetime",
+ cl::desc("Use llvm.lifetime intrinsics to insert extra checks"), cl::Hidden,
+ cl::init(false));
static cl::opt<bool> ClDynamicAllocaStack(
"asan-stack-dynamic-alloca",
cl::desc("Use dynamic alloca to represent stack variables"), cl::Hidden,
cl::init(true));
+static cl::opt<uint32_t> ClForceExperiment(
+ "asan-force-experiment",
+ cl::desc("Force optimization experiment (for testing)"), cl::Hidden,
+ cl::init(0));
+
// Debug flags.
static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
cl::init(0));
static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
cl::Hidden, cl::init(0));
-static cl::opt<std::string> ClDebugFunc("asan-debug-func",
- cl::Hidden, cl::desc("Debug func"));
+static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden,
+ cl::desc("Debug func"));
static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
cl::Hidden, cl::init(-1));
static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
@@ -207,10 +232,10 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
STATISTIC(NumInstrumentedDynamicAllocas,
"Number of instrumented dynamic allocas");
-STATISTIC(NumOptimizedAccessesToGlobalArray,
- "Number of optimized accesses to global arrays");
STATISTIC(NumOptimizedAccessesToGlobalVar,
"Number of optimized accesses to global vars");
+STATISTIC(NumOptimizedAccessesToStackVar,
+ "Number of optimized accesses to stack vars");
namespace {
/// Frontend-provided metadata for source location.
@@ -238,9 +263,7 @@ struct LocationMetadata {
class GlobalsMetadata {
public:
struct Entry {
- Entry()
- : SourceLoc(), Name(), IsDynInit(false),
- IsBlacklisted(false) {}
+ Entry() : SourceLoc(), Name(), IsDynInit(false), IsBlacklisted(false) {}
LocationMetadata SourceLoc;
StringRef Name;
bool IsDynInit;
@@ -249,19 +272,17 @@ class GlobalsMetadata {
GlobalsMetadata() : inited_(false) {}
- void init(Module& M) {
+ void init(Module &M) {
assert(!inited_);
inited_ = true;
NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
- if (!Globals)
- return;
+ if (!Globals) return;
for (auto MDN : Globals->operands()) {
// Metadata node contains the global and the fields of "Entry".
assert(MDN->getNumOperands() == 5);
auto *GV = mdconst::extract_or_null<GlobalVariable>(MDN->getOperand(0));
// The optimizer may optimize away a global entirely.
- if (!GV)
- continue;
+ if (!GV) continue;
// We can already have an entry for GV if it was merged with another
// global.
Entry &E = Entries[GV];
@@ -286,7 +307,7 @@ class GlobalsMetadata {
private:
bool inited_;
- DenseMap<GlobalVariable*, Entry> Entries;
+ DenseMap<GlobalVariable *, Entry> Entries;
};
/// This struct defines the shadow mapping using the rule:
@@ -371,17 +392,36 @@ struct AddressSanitizer : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
- void instrumentMop(Instruction *I, bool UseCalls);
+ uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
+ Type *Ty = AI->getAllocatedType();
+ uint64_t SizeInBytes =
+ AI->getModule()->getDataLayout().getTypeAllocSize(Ty);
+ return SizeInBytes;
+ }
+ /// Check if we want (and can) handle this alloca.
+ bool isInterestingAlloca(AllocaInst &AI) const;
+ /// If it is an interesting memory access, return the PointerOperand
+ /// and set IsWrite/Alignment. Otherwise return nullptr.
+ Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
+ uint64_t *TypeSize,
+ unsigned *Alignment) const;
+ void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
+ bool UseCalls, const DataLayout &DL);
void instrumentPointerComparisonOrSubtraction(Instruction *I);
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument, bool UseCalls);
+ Value *SizeArgument, bool UseCalls, uint32_t Exp);
+ void instrumentUnusualSizeOrAlignment(Instruction *I, Value *Addr,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex,
- Value *SizeArgument);
+ Value *SizeArgument, uint32_t Exp);
void instrumentMemIntrinsic(MemIntrinsic *MI);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool runOnFunction(Function &F) override;
@@ -396,9 +436,10 @@ struct AddressSanitizer : public FunctionPass {
bool LooksLikeCodeInBug11395(Instruction *I);
bool GlobalIsLinkerInitialized(GlobalVariable *G);
+ bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
+ uint64_t TypeSize) const;
LLVMContext *C;
- const DataLayout *DL;
Triple TargetTriple;
int LongSize;
Type *IntptrTy;
@@ -408,12 +449,12 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanInitFunction;
Function *AsanHandleNoReturnFunc;
Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
- // This array is indexed by AccessIsWrite and log2(AccessSize).
- Function *AsanErrorCallback[2][kNumberOfAccessSizes];
- Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes];
- // This array is indexed by AccessIsWrite.
- Function *AsanErrorCallbackSized[2],
- *AsanMemoryAccessCallbackSized[2];
+ // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize).
+ Function *AsanErrorCallback[2][2][kNumberOfAccessSizes];
+ Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes];
+ // This array is indexed by AccessIsWrite and Experiment.
+ Function *AsanErrorCallbackSized[2][2];
+ Function *AsanMemoryAccessCallbackSized[2][2];
Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
InlineAsm *EmptyAsm;
GlobalsMetadata GlobalsMD;
@@ -426,9 +467,7 @@ class AddressSanitizerModule : public ModulePass {
AddressSanitizerModule() : ModulePass(ID) {}
bool runOnModule(Module &M) override;
static char ID; // Pass identification, replacement for typeid
- const char *getPassName() const override {
- return "AddressSanitizerModule";
- }
+ const char *getPassName() const override { return "AddressSanitizerModule"; }
private:
void initializeCallbacks(Module &M);
@@ -444,7 +483,6 @@ class AddressSanitizerModule : public ModulePass {
GlobalsMetadata GlobalsMD;
Type *IntptrTy;
LLVMContext *C;
- const DataLayout *DL;
Triple TargetTriple;
ShadowMapping Mapping;
Function *AsanPoisonGlobals;
@@ -471,12 +509,12 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
Type *IntptrPtrTy;
ShadowMapping Mapping;
- SmallVector<AllocaInst*, 16> AllocaVec;
- SmallVector<Instruction*, 8> RetVec;
+ SmallVector<AllocaInst *, 16> AllocaVec;
+ SmallVector<Instruction *, 8> RetVec;
unsigned StackAlignment;
Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
- *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+ *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
// Stores a place and arguments of poisoning/unpoisoning call for alloca.
@@ -497,33 +535,38 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
Value *LeftRzAddr;
Value *RightRzAddr;
bool Poison;
- explicit DynamicAllocaCall(AllocaInst *AI,
- Value *LeftRzAddr = nullptr,
- Value *RightRzAddr = nullptr)
- : AI(AI), LeftRzAddr(LeftRzAddr), RightRzAddr(RightRzAddr), Poison(true)
- {}
+ explicit DynamicAllocaCall(AllocaInst *AI, Value *LeftRzAddr = nullptr,
+ Value *RightRzAddr = nullptr)
+ : AI(AI),
+ LeftRzAddr(LeftRzAddr),
+ RightRzAddr(RightRzAddr),
+ Poison(true) {}
};
SmallVector<DynamicAllocaCall, 1> DynamicAllocaVec;
// Maps Value to an AllocaInst from which the Value is originated.
- typedef DenseMap<Value*, AllocaInst*> AllocaForValueMapTy;
+ typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy;
AllocaForValueMapTy AllocaForValue;
bool HasNonEmptyInlineAsm;
std::unique_ptr<CallInst> EmptyInlineAsm;
FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
- : F(F), ASan(ASan), DIB(*F.getParent(), /*AllowUnresolved*/ false),
- C(ASan.C), IntptrTy(ASan.IntptrTy),
- IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping),
- StackAlignment(1 << Mapping.Scale), HasNonEmptyInlineAsm(false),
+ : F(F),
+ ASan(ASan),
+ DIB(*F.getParent(), /*AllowUnresolved*/ false),
+ C(ASan.C),
+ IntptrTy(ASan.IntptrTy),
+ IntptrPtrTy(PointerType::get(IntptrTy, 0)),
+ Mapping(ASan.Mapping),
+ StackAlignment(1 << Mapping.Scale),
+ HasNonEmptyInlineAsm(false),
EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
bool runOnFunction() {
if (!ClStack) return false;
// Collect alloca, ret, lifetime instructions etc.
- for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
- visit(*BB);
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB);
if (AllocaVec.empty() && DynamicAllocaVec.empty()) return false;
@@ -544,33 +587,31 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
// ----------------------- Visitors.
/// \brief Collect all Ret instructions.
- void visitReturnInst(ReturnInst &RI) {
- RetVec.push_back(&RI);
- }
+ void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); }
// Unpoison dynamic allocas redzones.
void unpoisonDynamicAlloca(DynamicAllocaCall &AllocaCall) {
- if (!AllocaCall.Poison)
- return;
+ if (!AllocaCall.Poison) return;
for (auto Ret : RetVec) {
IRBuilder<> IRBRet(Ret);
PointerType *Int32PtrTy = PointerType::getUnqual(IRBRet.getInt32Ty());
Value *Zero = Constant::getNullValue(IRBRet.getInt32Ty());
Value *PartialRzAddr = IRBRet.CreateSub(AllocaCall.RightRzAddr,
ConstantInt::get(IntptrTy, 4));
- IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr,
- Int32PtrTy));
- IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(PartialRzAddr,
- Int32PtrTy));
- IRBRet.CreateStore(Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr,
- Int32PtrTy));
+ IRBRet.CreateStore(
+ Zero, IRBRet.CreateIntToPtr(AllocaCall.LeftRzAddr, Int32PtrTy));
+ IRBRet.CreateStore(Zero,
+ IRBRet.CreateIntToPtr(PartialRzAddr, Int32PtrTy));
+ IRBRet.CreateStore(
+ Zero, IRBRet.CreateIntToPtr(AllocaCall.RightRzAddr, Int32PtrTy));
}
}
// Right shift for BigEndian and left shift for LittleEndian.
Value *shiftAllocaMagic(Value *Val, IRBuilder<> &IRB, Value *Shift) {
- return ASan.DL->isLittleEndian() ? IRB.CreateShl(Val, Shift)
- : IRB.CreateLShr(Val, Shift);
+ auto &DL = F.getParent()->getDataLayout();
+ return DL.isLittleEndian() ? IRB.CreateShl(Val, Shift)
+ : IRB.CreateLShr(Val, Shift);
}
// Compute PartialRzMagic for dynamic alloca call. Since we don't know the
@@ -599,7 +640,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
/// \brief Collect Alloca instructions we want (and can) handle.
void visitAllocaInst(AllocaInst &AI) {
- if (!isInterestingAlloca(AI)) return;
+ if (!ASan.isInterestingAlloca(AI)) return;
StackAlignment = std::max(StackAlignment, AI.getAlignment());
if (isDynamicAlloca(AI))
@@ -613,8 +654,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
void visitIntrinsicInst(IntrinsicInst &II) {
if (!ClCheckLifetime) return;
Intrinsic::ID ID = II.getIntrinsicID();
- if (ID != Intrinsic::lifetime_start &&
- ID != Intrinsic::lifetime_end)
+ if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
return;
// Found lifetime intrinsic, add ASan instrumentation if necessary.
ConstantInt *Size = dyn_cast<ConstantInt>(II.getArgOperand(0));
@@ -644,8 +684,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
bool doesDominateAllExits(const Instruction *I) const {
for (auto Ret : RetVec) {
- if (!ASan.getDominatorTree().dominates(I, Ret))
- return false;
+ if (!ASan.getDominatorTree().dominates(I, Ret)) return false;
}
return true;
}
@@ -653,19 +692,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
bool isDynamicAlloca(AllocaInst &AI) const {
return AI.isArrayAllocation() || !AI.isStaticAlloca();
}
-
- // Check if we want (and can) handle this alloca.
- bool isInterestingAlloca(AllocaInst &AI) const {
- return (AI.getAllocatedType()->isSized() &&
- // alloca() may be called with 0 size, ignore it.
- getAllocaSizeInBytes(&AI) > 0);
- }
-
- uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
- Type *Ty = AI->getAllocatedType();
- uint64_t SizeInBytes = ASan.DL->getTypeAllocSize(Ty);
- return SizeInBytes;
- }
/// Finds alloca where the value comes from.
AllocaInst *findAllocaForValue(Value *V);
void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
@@ -683,21 +709,25 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
} // namespace
char AddressSanitizer::ID = 0;
-INITIALIZE_PASS_BEGIN(AddressSanitizer, "asan",
- "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
- false, false)
+INITIALIZE_PASS_BEGIN(
+ AddressSanitizer, "asan",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+ false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(AddressSanitizer, "asan",
- "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
- false, false)
+INITIALIZE_PASS_END(
+ AddressSanitizer, "asan",
+ "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
+ false)
FunctionPass *llvm::createAddressSanitizerFunctionPass() {
return new AddressSanitizer();
}
char AddressSanitizerModule::ID = 0;
-INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
+INITIALIZE_PASS(
+ AddressSanitizerModule, "asan-module",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
- "ModulePass", false, false)
+ "ModulePass",
+ false, false)
ModulePass *llvm::createAddressSanitizerModulePass() {
return new AddressSanitizerModule();
}
@@ -709,16 +739,15 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
}
// \brief Create a constant for Str so that we can pass it to the run-time lib.
-static GlobalVariable *createPrivateGlobalForString(
- Module &M, StringRef Str, bool AllowMerging) {
+static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str,
+ bool AllowMerging) {
Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
// We use private linkage for module-local strings. If they can be merged
// with another one, we set the unnamed_addr attribute.
GlobalVariable *GV =
new GlobalVariable(M, StrConst->getType(), true,
GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix);
- if (AllowMerging)
- GV->setUnnamedAddr(true);
+ if (AllowMerging) GV->setUnnamedAddr(true);
GV->setAlignment(1); // Strings may not be merged w/o setting align 1.
return GV;
}
@@ -747,8 +776,7 @@ static bool GlobalWasGeneratedByAsan(GlobalVariable *G) {
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
// Shadow >> scale
Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
- if (Mapping.Offset == 0)
- return Shadow;
+ if (Mapping.Offset == 0) return Shadow;
// (Shadow >> scale) | offset
if (Mapping.OrShadowOffset)
return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
@@ -775,38 +803,61 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
MI->eraseFromParent();
}
-// If I is an interesting memory access, return the PointerOperand
-// and set IsWrite/Alignment. Otherwise return nullptr.
-static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
- unsigned *Alignment) {
+/// Check if we want (and can) handle this alloca.
+bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) const {
+ return (AI.getAllocatedType()->isSized() &&
+ // alloca() may be called with 0 size, ignore it.
+ getAllocaSizeInBytes(&AI) > 0 &&
+ // We are only interested in allocas not promotable to registers.
+ // Promotable allocas are common under -O0.
+ (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)));
+}
+
+/// If I is an interesting memory access, return the PointerOperand
+/// and set IsWrite/Alignment. Otherwise return nullptr.
+Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
+ bool *IsWrite,
+ uint64_t *TypeSize,
+ unsigned *Alignment) const {
// Skip memory accesses inserted by another instrumentation.
- if (I->getMetadata("nosanitize"))
- return nullptr;
+ if (I->getMetadata("nosanitize")) return nullptr;
+
+ Value *PtrOperand = nullptr;
+ const DataLayout &DL = I->getModule()->getDataLayout();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!ClInstrumentReads) return nullptr;
*IsWrite = false;
+ *TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
*Alignment = LI->getAlignment();
- return LI->getPointerOperand();
- }
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ PtrOperand = LI->getPointerOperand();
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (!ClInstrumentWrites) return nullptr;
*IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
*Alignment = SI->getAlignment();
- return SI->getPointerOperand();
- }
- if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ PtrOperand = SI->getPointerOperand();
+ } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
if (!ClInstrumentAtomics) return nullptr;
*IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
*Alignment = 0;
- return RMW->getPointerOperand();
- }
- if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
+ PtrOperand = RMW->getPointerOperand();
+ } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
if (!ClInstrumentAtomics) return nullptr;
*IsWrite = true;
+ *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
*Alignment = 0;
- return XCHG->getPointerOperand();
+ PtrOperand = XCHG->getPointerOperand();
}
- return nullptr;
+
+ // Treat memory accesses to promotable allocas as non-interesting since they
+ // will not cause memory violations. This greatly speeds up the instrumented
+ // executable at -O0.
+ if (ClSkipPromotableAllocas)
+ if (auto AI = dyn_cast_or_null<AllocaInst>(PtrOperand))
+ return isInterestingAlloca(*AI) ? AI : nullptr;
+
+ return PtrOperand;
}
static bool isPointerOperand(Value *V) {
@@ -818,17 +869,15 @@ static bool isPointerOperand(Value *V) {
// the frontend.
static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
if (ICmpInst *Cmp = dyn_cast<ICmpInst>(I)) {
- if (!Cmp->isRelational())
- return false;
+ if (!Cmp->isRelational()) return false;
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- if (BO->getOpcode() != Instruction::Sub)
- return false;
+ if (BO->getOpcode() != Instruction::Sub) return false;
} else {
return false;
}
if (!isPointerOperand(I->getOperand(0)) ||
!isPointerOperand(I->getOperand(1)))
- return false;
+ return false;
return true;
}
@@ -839,8 +888,8 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
return G->hasInitializer() && !GlobalsMD.get(G).IsDynInit;
}
-void
-AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
+void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
+ Instruction *I) {
IRBuilder<> IRB(I);
Function *F = isa<ICmpInst>(I) ? AsanPtrCmpFunction : AsanPtrSubFunction;
Value *Param[2] = {I->getOperand(0), I->getOperand(1)};
@@ -851,38 +900,47 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
IRB.CreateCall2(F, Param[0], Param[1]);
}
-void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
+void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
+ Instruction *I, bool UseCalls,
+ const DataLayout &DL) {
bool IsWrite = false;
unsigned Alignment = 0;
- Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment);
+ uint64_t TypeSize = 0;
+ Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment);
assert(Addr);
+
+ // Optimization experiments.
+ // The experiments can be used to evaluate potential optimizations that remove
+ // instrumentation (assess false negatives). Instead of completely removing
+ // some instrumentation, you set Exp to a non-zero value (mask of optimization
+ // experiments that want to remove instrumentation of this instruction).
+ // If Exp is non-zero, this pass will emit special calls into runtime
+ // (e.g. __asan_report_exp_load1 instead of __asan_report_load1). These calls
+ // make runtime terminate the program in a special way (with a different
+ // exit status). Then you run the new compiler on a buggy corpus, collect
+ // the special terminations (ideally, you don't see them at all -- no false
+ // negatives) and make the decision on the optimization.
+ uint32_t Exp = ClForceExperiment;
+
if (ClOpt && ClOptGlobals) {
- if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
- // If initialization order checking is disabled, a simple access to a
- // dynamically initialized global is always valid.
- if (!ClInitializers || GlobalIsLinkerInitialized(G)) {
- NumOptimizedAccessesToGlobalVar++;
- return;
- }
- }
- ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr);
- if (CE && CE->isGEPWithNoNotionalOverIndexing()) {
- if (GlobalVariable *G = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
- if (CE->getOperand(1)->isNullValue() && GlobalIsLinkerInitialized(G)) {
- NumOptimizedAccessesToGlobalArray++;
- return;
- }
- }
+ // If initialization order checking is disabled, a simple access to a
+ // dynamically initialized global is always valid.
+ GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL));
+ if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
+ isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
+ NumOptimizedAccessesToGlobalVar++;
+ return;
}
}
- Type *OrigPtrTy = Addr->getType();
- Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
-
- assert(OrigTy->isSized());
- uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
-
- assert((TypeSize % 8) == 0);
+ if (ClOpt && ClOptStack) {
+ // A direct inbounds access to a stack variable is always valid.
+ if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
+ isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
+ NumOptimizedAccessesToStackVar++;
+ return;
+ }
+ }
if (IsWrite)
NumInstrumentedWrites++;
@@ -895,23 +953,10 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
TypeSize == 128) &&
(Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
- return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls);
- // Instrument unusual size or unusual alignment.
- // We can not do it with a single check, so we do 1-byte check for the first
- // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
- // to report the actual access size.
- IRBuilder<> IRB(I);
- Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
- if (UseCalls) {
- IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size);
- } else {
- Value *LastByte = IRB.CreateIntToPtr(
- IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
- OrigPtrTy);
- instrumentAddress(I, I, Addr, 8, IsWrite, Size, false);
- instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false);
- }
+ return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls,
+ Exp);
+ instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
+ UseCalls, Exp);
}
// Validate the result of Module::getOrInsertFunction called for an interface
@@ -921,17 +966,34 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
if (isa<Function>(FuncOrBitcast)) return cast<Function>(FuncOrBitcast);
FuncOrBitcast->dump();
- report_fatal_error("trying to redefine an AddressSanitizer "
- "interface function");
+ report_fatal_error(
+ "trying to redefine an AddressSanitizer "
+ "interface function");
}
-Instruction *AddressSanitizer::generateCrashCode(
- Instruction *InsertBefore, Value *Addr,
- bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument) {
+Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
+ Value *Addr, bool IsWrite,
+ size_t AccessSizeIndex,
+ Value *SizeArgument,
+ uint32_t Exp) {
IRBuilder<> IRB(InsertBefore);
- CallInst *Call = SizeArgument
- ? IRB.CreateCall2(AsanErrorCallbackSized[IsWrite], Addr, SizeArgument)
- : IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+ Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp);
+ CallInst *Call = nullptr;
+ if (SizeArgument) {
+ if (Exp == 0)
+ Call = IRB.CreateCall2(AsanErrorCallbackSized[IsWrite][0], Addr,
+ SizeArgument);
+ else
+ Call = IRB.CreateCall3(AsanErrorCallbackSized[IsWrite][1], Addr,
+ SizeArgument, ExpVal);
+ } else {
+ if (Exp == 0)
+ Call =
+ IRB.CreateCall(AsanErrorCallback[IsWrite][0][AccessSizeIndex], Addr);
+ else
+ Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][1][AccessSizeIndex],
+ Addr, ExpVal);
+ }
// We don't do Call->setDoesNotReturn() because the BB already has
// UnreachableInst at the end.
@@ -941,19 +1003,19 @@ Instruction *AddressSanitizer::generateCrashCode(
}
Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
- Value *ShadowValue,
- uint32_t TypeSize) {
+ Value *ShadowValue,
+ uint32_t TypeSize) {
size_t Granularity = 1 << Mapping.Scale;
// Addr & (Granularity - 1)
- Value *LastAccessedByte = IRB.CreateAnd(
- AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+ Value *LastAccessedByte =
+ IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
// (Addr & (Granularity - 1)) + size - 1
if (TypeSize / 8 > 1)
LastAccessedByte = IRB.CreateAdd(
LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
- LastAccessedByte = IRB.CreateIntCast(
- LastAccessedByte, ShadowValue->getType(), false);
+ LastAccessedByte =
+ IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}
@@ -961,24 +1023,29 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument, bool UseCalls) {
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp) {
IRBuilder<> IRB(InsertBefore);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
if (UseCalls) {
- IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex],
- AddrLong);
+ if (Exp == 0)
+ IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
+ AddrLong);
+ else
+ IRB.CreateCall2(AsanMemoryAccessCallback[IsWrite][1][AccessSizeIndex],
+ AddrLong, ConstantInt::get(IRB.getInt32Ty(), Exp));
return;
}
- Type *ShadowTy = IntegerType::get(
- *C, std::max(8U, TypeSize >> Mapping.Scale));
+ Type *ShadowTy =
+ IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale));
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
Value *ShadowPtr = memToShadow(AddrLong, IRB);
Value *CmpVal = Constant::getNullValue(ShadowTy);
- Value *ShadowValue = IRB.CreateLoad(
- IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+ Value *ShadowValue =
+ IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
size_t Granularity = 1 << Mapping.Scale;
@@ -987,9 +1054,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
// We use branch weights for the slow path check, to indicate that the slow
// path is rarely taken. This seems to be the case for SPEC benchmarks.
- TerminatorInst *CheckTerm =
- SplitBlockAndInsertIfThen(Cmp, InsertBefore, false,
- MDBuilder(*C).createBranchWeights(1, 100000));
+ TerminatorInst *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, InsertBefore, false, MDBuilder(*C).createBranchWeights(1, 100000));
assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
@@ -1003,11 +1069,37 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true);
}
- Instruction *Crash = generateCrashCode(
- CrashTerm, AddrLong, IsWrite, AccessSizeIndex, SizeArgument);
+ Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
+ AccessSizeIndex, SizeArgument, Exp);
Crash->setDebugLoc(OrigIns->getDebugLoc());
}
+// Instrument unusual size or unusual alignment.
+// We can not do it with a single check, so we do 1-byte check for the first
+// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
+// to report the actual access size.
+void AddressSanitizer::instrumentUnusualSizeOrAlignment(
+ Instruction *I, Value *Addr, uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+ IRBuilder<> IRB(I);
+ Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ if (UseCalls) {
+ if (Exp == 0)
+ IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite][0], AddrLong,
+ Size);
+ else
+ IRB.CreateCall3(AsanMemoryAccessCallbackSized[IsWrite][1], AddrLong, Size,
+ ConstantInt::get(IRB.getInt32Ty(), Exp));
+ } else {
+ Value *LastByte = IRB.CreateIntToPtr(
+ IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+ Addr->getType());
+ instrumentAddress(I, I, Addr, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false, Exp);
+ }
+}
+
void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
GlobalValue *ModuleName) {
// Set up the arguments to our poison/unpoison functions.
@@ -1029,12 +1121,11 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
for (Use &OP : CA->operands()) {
- if (isa<ConstantAggregateZero>(OP))
- continue;
+ if (isa<ConstantAggregateZero>(OP)) continue;
ConstantStruct *CS = cast<ConstantStruct>(OP);
// Must have a function or null ptr.
- if (Function* F = dyn_cast<Function>(CS->getOperand(1))) {
+ if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
if (F->getName() == kAsanModuleCtorName) continue;
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
// Don't instrument CTORs that will run before asan.module_ctor.
@@ -1059,13 +1150,11 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
G->getLinkage() != GlobalVariable::PrivateLinkage &&
G->getLinkage() != GlobalVariable::InternalLinkage)
return false;
- if (G->hasComdat())
- return false;
+ if (G->hasComdat()) return false;
// Two problems with thread-locals:
// - The address of the main thread's copy can't be computed at link-time.
// - Need to poison all copies, not just the main thread's one.
- if (G->isThreadLocal())
- return false;
+ if (G->isThreadLocal()) return false;
// For now, just ignore this Global if the alignment is large.
if (G->getAlignment() > MinRedzoneSizeForGlobal()) return false;
@@ -1076,10 +1165,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
StringRef ParsedSegment, ParsedSection;
unsigned TAA = 0, StubSize = 0;
bool TAAParsed;
- std::string ErrorCode =
- MCSectionMachO::ParseSectionSpecifier(Section, ParsedSegment,
- ParsedSection, TAA, TAAParsed,
- StubSize);
+ std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
+ Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
if (!ErrorCode.empty()) {
report_fatal_error("Invalid section specifier '" + ParsedSection +
"': " + ErrorCode + ".");
@@ -1140,12 +1227,11 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
// Declare functions that register/unregister globals.
AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, nullptr));
+ kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
- AsanUnregisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanUnregisterGlobalsName,
- IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ AsanUnregisterGlobals = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
+ IntptrTy, IntptrTy, nullptr));
AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
}
@@ -1158,8 +1244,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
SmallVector<GlobalVariable *, 16> GlobalsToChange;
for (auto &G : M.globals()) {
- if (ShouldInstrumentGlobal(&G))
- GlobalsToChange.push_back(&G);
+ if (ShouldInstrumentGlobal(&G)) GlobalsToChange.push_back(&G);
}
size_t n = GlobalsToChange.size();
@@ -1184,8 +1269,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
// We shouldn't merge same module names, as this string serves as unique
// module ID in runtime.
GlobalVariable *ModuleName = createPrivateGlobalForString(
- M, M.getModuleIdentifier(), /*AllowMerging*/false);
+ M, M.getModuleIdentifier(), /*AllowMerging*/ false);
+ auto &DL = M.getDataLayout();
for (size_t i = 0; i < n; i++) {
static const uint64_t kMaxGlobalRedzone = 1 << 18;
GlobalVariable *G = GlobalsToChange[i];
@@ -1199,32 +1285,30 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
PointerType *PtrTy = cast<PointerType>(G->getType());
Type *Ty = PtrTy->getElementType();
- uint64_t SizeInBytes = DL->getTypeAllocSize(Ty);
+ uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
uint64_t MinRZ = MinRedzoneSizeForGlobal();
// MinRZ <= RZ <= kMaxGlobalRedzone
// and trying to make RZ to be ~ 1/4 of SizeInBytes.
- uint64_t RZ = std::max(MinRZ,
- std::min(kMaxGlobalRedzone,
- (SizeInBytes / MinRZ / 4) * MinRZ));
+ uint64_t RZ = std::max(
+ MinRZ, std::min(kMaxGlobalRedzone, (SizeInBytes / MinRZ / 4) * MinRZ));
uint64_t RightRedzoneSize = RZ;
// Round up to MinRZ
- if (SizeInBytes % MinRZ)
- RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
+ if (SizeInBytes % MinRZ) RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr);
- Constant *NewInitializer = ConstantStruct::get(
- NewTy, G->getInitializer(),
- Constant::getNullValue(RightRedZoneTy), nullptr);
+ Constant *NewInitializer =
+ ConstantStruct::get(NewTy, G->getInitializer(),
+ Constant::getNullValue(RightRedZoneTy), nullptr);
// Create a new global variable with enough space for a redzone.
GlobalValue::LinkageTypes Linkage = G->getLinkage();
if (G->isConstant() && Linkage == GlobalValue::PrivateLinkage)
Linkage = GlobalValue::InternalLinkage;
- GlobalVariable *NewGlobal = new GlobalVariable(
- M, NewTy, G->isConstant(), Linkage,
- NewInitializer, "", G, G->getThreadLocalMode());
+ GlobalVariable *NewGlobal =
+ new GlobalVariable(M, NewTy, G->isConstant(), Linkage, NewInitializer,
+ "", G, G->getThreadLocalMode());
NewGlobal->copyAttributesFrom(G);
NewGlobal->setAlignment(MinRZ);
@@ -1253,8 +1337,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, nullptr);
- if (ClInitializers && MD.IsDynInit)
- HasDynamicallyInitializedGlobals = true;
+ if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true;
DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
}
@@ -1273,9 +1356,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
// We also need to unregister globals at the end, e.g. when a shared library
// gets closed.
- Function *AsanDtorFunction = Function::Create(
- FunctionType::get(Type::getVoidTy(*C), false),
- GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+ Function *AsanDtorFunction =
+ Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
@@ -1288,12 +1371,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
}
bool AddressSanitizerModule::runOnModule(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- return false;
- DL = &DLP->getDataLayout();
C = &(M.getContext());
- int LongSize = DL->getPointerSizeInBits();
+ int LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
TargetTriple = Triple(M.getTargetTriple());
Mapping = getShadowMapping(TargetTriple, LongSize);
@@ -1305,8 +1384,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
assert(CtorFunc);
IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
- if (ClGlobals)
- Changed |= InstrumentGlobals(IRB, M);
+ if (ClGlobals) Changed |= InstrumentGlobals(IRB, M);
return Changed;
}
@@ -1314,33 +1392,34 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
void AddressSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Create __asan_report* callbacks.
- for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
- for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
- AccessSizeIndex++) {
- // IsWrite and TypeSize are encoded in the function name.
- std::string Suffix =
- (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
- AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
- checkInterfaceFunction(
- M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix,
- IRB.getVoidTy(), IntptrTy, nullptr));
- AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
- checkInterfaceFunction(
- M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix,
- IRB.getVoidTy(), IntptrTy, nullptr));
+ // IsWrite, TypeSize and Exp are encoded in the function name.
+ for (int Exp = 0; Exp < 2; Exp++) {
+ for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
+ const std::string TypeStr = AccessIsWrite ? "store" : "load";
+ const std::string ExpStr = Exp ? "exp_" : "";
+ const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
+ AsanErrorCallbackSized[AccessIsWrite][Exp] =
+ checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + TypeStr + "_n",
+ IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
+ AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
+ checkInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N",
+ IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
+ AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+ checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + Suffix, IRB.getVoidTy(),
+ IntptrTy, ExpType, nullptr));
+ AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+ checkInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + Suffix, IRB.getVoidTy(),
+ IntptrTy, ExpType, nullptr));
+ }
}
}
- AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanReportLoadN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
- AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
-
- AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction(
- M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN",
- IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
- AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction(
- M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN",
- IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction(
ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
@@ -1368,21 +1447,17 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
// virtual
bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
GlobalsMD.init(M);
C = &(M.getContext());
- LongSize = DL->getPointerSizeInBits();
+ LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
TargetTriple = Triple(M.getTargetTriple());
- AsanCtorFunction = Function::Create(
- FunctionType::get(Type::getVoidTy(*C), false),
- GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
+ AsanCtorFunction =
+ Function::Create(FunctionType::get(Type::getVoidTy(*C), false),
+ GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
// call __asan_init in the module ctor.
IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
@@ -1424,22 +1499,21 @@ bool AddressSanitizer::runOnFunction(Function &F) {
// If needed, insert __asan_init before checking for SanitizeAddress attr.
maybeInsertAsanInitAtFunctionEntry(F);
- if (!F.hasFnAttribute(Attribute::SanitizeAddress))
- return false;
+ if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return false;
- if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
- return false;
+ if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false;
// We want to instrument every address only once per basic block (unless there
// are calls between uses).
- SmallSet<Value*, 16> TempsToInstrument;
- SmallVector<Instruction*, 16> ToInstrument;
- SmallVector<Instruction*, 8> NoReturnCalls;
- SmallVector<BasicBlock*, 16> AllBlocks;
- SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts;
+ SmallSet<Value *, 16> TempsToInstrument;
+ SmallVector<Instruction *, 16> ToInstrument;
+ SmallVector<Instruction *, 8> NoReturnCalls;
+ SmallVector<BasicBlock *, 16> AllBlocks;
+ SmallVector<Instruction *, 16> PointerComparisonsOrSubtracts;
int NumAllocas = 0;
bool IsWrite;
unsigned Alignment;
+ uint64_t TypeSize;
// Fill the set of memory operations to instrument.
for (auto &BB : F) {
@@ -1448,8 +1522,8 @@ bool AddressSanitizer::runOnFunction(Function &F) {
int NumInsnsPerBB = 0;
for (auto &Inst : BB) {
if (LooksLikeCodeInBug11395(&Inst)) return false;
- if (Value *Addr =
- isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) {
+ if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
+ &Alignment)) {
if (ClOpt && ClOptSameTemp) {
if (!TempsToInstrument.insert(Addr).second)
continue; // We've seen this temp in the current BB.
@@ -1461,21 +1535,18 @@ bool AddressSanitizer::runOnFunction(Function &F) {
} else if (isa<MemIntrinsic>(Inst)) {
// ok, take it.
} else {
- if (isa<AllocaInst>(Inst))
- NumAllocas++;
+ if (isa<AllocaInst>(Inst)) NumAllocas++;
CallSite CS(&Inst);
if (CS) {
// A call inside BB.
TempsToInstrument.clear();
- if (CS.doesNotReturn())
- NoReturnCalls.push_back(CS.getInstruction());
+ if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction());
}
continue;
}
ToInstrument.push_back(&Inst);
NumInsnsPerBB++;
- if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB)
- break;
+ if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break;
}
}
@@ -1484,13 +1555,20 @@ bool AddressSanitizer::runOnFunction(Function &F) {
ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold)
UseCalls = true;
+ const TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(),
+ /*RoundToAlign=*/true);
+
// Instrument.
int NumInstrumented = 0;
for (auto Inst : ToInstrument) {
if (ClDebugMin < 0 || ClDebugMax < 0 ||
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
- if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment))
- instrumentMop(Inst, UseCalls);
+ if (isInterestingMemoryAccess(Inst, &IsWrite, &TypeSize, &Alignment))
+ instrumentMop(ObjSizeVis, Inst, UseCalls,
+ F.getParent()->getDataLayout());
else
instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
}
@@ -1549,10 +1627,9 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
IntptrTy, IntptrTy, nullptr));
}
-void
-FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
- IRBuilder<> &IRB, Value *ShadowBase,
- bool DoPoison) {
+void FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
+ IRBuilder<> &IRB, Value *ShadowBase,
+ bool DoPoison) {
size_t n = ShadowBytes.size();
size_t i = 0;
// We need to (un)poison n bytes of stack shadow. Poison as many as we can
@@ -1563,7 +1640,7 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
for (; i + LargeStoreSizeInBytes - 1 < n; i += LargeStoreSizeInBytes) {
uint64_t Val = 0;
for (size_t j = 0; j < LargeStoreSizeInBytes; j++) {
- if (ASan.DL->isLittleEndian())
+ if (F.getParent()->getDataLayout().isLittleEndian())
Val |= (uint64_t)ShadowBytes[i + j] << (8 * j);
else
Val = (Val << 8) | ShadowBytes[i + j];
@@ -1582,9 +1659,8 @@ FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
static int StackMallocSizeClass(uint64_t LocalStackSize) {
assert(LocalStackSize <= kMaxStackMallocSize);
uint64_t MaxSize = kMinStackMallocSize;
- for (int i = 0; ; i++, MaxSize *= 2)
- if (LocalStackSize <= MaxSize)
- return i;
+ for (int i = 0;; i++, MaxSize *= 2)
+ if (LocalStackSize <= MaxSize) return i;
llvm_unreachable("impossible LocalStackSize");
}
@@ -1596,18 +1672,21 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) {
void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
IRBuilder<> &IRB, Value *ShadowBase, int Size) {
assert(!(Size % 8));
- assert(kAsanStackAfterReturnMagic == 0xf5);
+
+ // kAsanStackAfterReturnMagic is 0xf5.
+ const uint64_t kAsanStackAfterReturnMagic64 = 0xf5f5f5f5f5f5f5f5ULL;
+
for (int i = 0; i < Size; i += 8) {
Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
- IRB.CreateStore(ConstantInt::get(IRB.getInt64Ty(), 0xf5f5f5f5f5f5f5f5ULL),
- IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
+ IRB.CreateStore(
+ ConstantInt::get(IRB.getInt64Ty(), kAsanStackAfterReturnMagic64),
+ IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
}
}
static DebugLoc getFunctionEntryDebugLocation(Function &F) {
for (const auto &Inst : F.getEntryBlock())
- if (!isa<AllocaInst>(Inst))
- return Inst.getDebugLoc();
+ if (!isa<AllocaInst>(Inst)) return Inst.getDebugLoc();
return DebugLoc();
}
@@ -1664,9 +1743,9 @@ void FunctionStackPoisoner::poisonStack() {
SmallVector<ASanStackVariableDescription, 16> SVD;
SVD.reserve(AllocaVec.size());
for (AllocaInst *AI : AllocaVec) {
- ASanStackVariableDescription D = { AI->getName().data(),
- getAllocaSizeInBytes(AI),
- AI->getAlignment(), AI, 0};
+ ASanStackVariableDescription D = {AI->getName().data(),
+ ASan.getAllocaSizeInBytes(AI),
+ AI->getAlignment(), AI, 0};
SVD.push_back(D);
}
// Minimal header size (left redzone) is 4 pointers,
@@ -1757,19 +1836,19 @@ void FunctionStackPoisoner::poisonStack() {
BasePlus0);
// Write the frame description constant to redzone[1].
Value *BasePlus1 = IRB.CreateIntToPtr(
- IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)),
- IntptrPtrTy);
+ IRB.CreateAdd(LocalStackBase,
+ ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
+ IntptrPtrTy);
GlobalVariable *StackDescriptionGlobal =
createPrivateGlobalForString(*F.getParent(), L.DescriptionString,
- /*AllowMerging*/true);
- Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal,
- IntptrTy);
+ /*AllowMerging*/ true);
+ Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
IRB.CreateStore(Description, BasePlus1);
// Write the PC to redzone[2].
Value *BasePlus2 = IRB.CreateIntToPtr(
- IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy,
- 2 * ASan.LongSize/8)),
- IntptrPtrTy);
+ IRB.CreateAdd(LocalStackBase,
+ ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)),
+ IntptrPtrTy);
IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
// Poison the stack redzones at the entry.
@@ -1830,8 +1909,7 @@ void FunctionStackPoisoner::poisonStack() {
}
// We are done. Remove the old unused alloca instructions.
- for (auto AI : AllocaVec)
- AI->eraseFromParent();
+ for (auto AI : AllocaVec) AI->eraseFromParent();
}
void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
@@ -1839,9 +1917,9 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
// For now just insert the call to ASan runtime.
Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
Value *SizeArg = ConstantInt::get(IntptrTy, Size);
- IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc
- : AsanUnpoisonStackMemoryFunc,
- AddrArg, SizeArg);
+ IRB.CreateCall2(
+ DoPoison ? AsanPoisonStackMemoryFunc : AsanUnpoisonStackMemoryFunc,
+ AddrArg, SizeArg);
}
// Handling llvm.lifetime intrinsics for a given %alloca:
@@ -1856,12 +1934,11 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
// We're intested only in allocas we can handle.
- return isInterestingAlloca(*AI) ? AI : nullptr;
+ return ASan.isInterestingAlloca(*AI) ? AI : nullptr;
// See if we've already calculated (or started to calculate) alloca for a
// given value.
AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
- if (I != AllocaForValue.end())
- return I->second;
+ if (I != AllocaForValue.end()) return I->second;
// Store 0 while we're calculating alloca for value V to avoid
// infinite recursion if the value references itself.
AllocaForValue[V] = nullptr;
@@ -1880,8 +1957,7 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
Res = IncValueAI;
}
}
- if (Res)
- AllocaForValue[V] = Res;
+ if (Res) AllocaForValue[V] = Res;
return Res;
}
@@ -1912,14 +1988,14 @@ Value *FunctionStackPoisoner::computePartialRzMagic(Value *PartialSize,
Value *Shift = IRB.CreateAnd(PartialSize, IRB.getInt32(~7));
unsigned Val1Int = kAsanAllocaPartialVal1;
unsigned Val2Int = kAsanAllocaPartialVal2;
- if (!ASan.DL->isLittleEndian()) {
+ if (!F.getParent()->getDataLayout().isLittleEndian()) {
Val1Int = sys::getSwappedBytes(Val1Int);
Val2Int = sys::getSwappedBytes(Val2Int);
}
Value *Val1 = shiftAllocaMagic(IRB.getInt32(Val1Int), IRB, Shift);
Value *PartialBits = IRB.CreateAnd(PartialSize, IRB.getInt32(7));
// For BigEndian get 0x000000YZ -> 0xYZ000000.
- if (ASan.DL->isBigEndian())
+ if (F.getParent()->getDataLayout().isBigEndian())
PartialBits = IRB.CreateShl(PartialBits, IRB.getInt32(24));
Value *Val2 = IRB.getInt32(Val2Int);
Value *Cond =
@@ -1953,7 +2029,8 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(
// redzones, and OldSize is number of allocated blocks with
// ElementSize size, get allocated memory size in bytes by
// OldSize * ElementSize.
- unsigned ElementSize = ASan.DL->getTypeAllocSize(AI->getAllocatedType());
+ unsigned ElementSize =
+ F.getParent()->getDataLayout().getTypeAllocSize(AI->getAllocatedType());
Value *OldSize = IRB.CreateMul(AI->getArraySize(),
ConstantInt::get(IntptrTy, ElementSize));
@@ -2021,3 +2098,20 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(
AI->eraseFromParent();
NumInstrumentedDynamicAllocas++;
}
+
+// isSafeAccess returns true if Addr is always inbounds with respect to its
+// base object. For example, it is a field access or an array access with
+// constant inbounds index.
+bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis,
+ Value *Addr, uint64_t TypeSize) const {
+ SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr);
+ if (!ObjSizeVis.bothKnown(SizeOffset)) return false;
+ uint64_t Size = SizeOffset.first.getZExtValue();
+ int64_t Offset = SizeOffset.second.getSExtValue();
+ // Three checks are required to ensure safety:
+ // . Offset >= 0 (since the offset is given from the base ptr)
+ // . Size >= Offset (unsigned)
+ // . Size - Offset >= NeededSize (unsigned)
+ return Offset >= 0 && Size >= uint64_t(Offset) &&
+ Size - uint64_t(Offset) >= TypeSize / 8;
+}
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 2b5f39c..8113834 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -24,7 +25,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
using namespace llvm;
#define DEBUG_TYPE "bounds-checking"
@@ -49,12 +49,10 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
private:
- const DataLayout *DL;
const TargetLibraryInfo *TLI;
ObjectSizeOffsetEvaluator *ObjSizeEval;
BuilderTy *Builder;
@@ -63,7 +61,7 @@ namespace {
BasicBlock *getTrapBB();
void emitBranchToTrap(Value *Cmp = nullptr);
- bool instrument(Value *Ptr, Value *Val);
+ bool instrument(Value *Ptr, Value *Val, const DataLayout &DL);
};
}
@@ -125,8 +123,9 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
/// result from the load or the value being stored. It is used to determine the
/// size of memory block that is touched.
/// Returns true if any change was made to the IR, false otherwise.
-bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
- uint64_t NeededSize = DL->getTypeStoreSize(InstVal->getType());
+bool BoundsChecking::instrument(Value *Ptr, Value *InstVal,
+ const DataLayout &DL) {
+ uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
<< " bytes\n");
@@ -141,7 +140,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
Value *Offset = SizeOffset.second;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
- Type *IntTy = DL->getIntPtrType(Ptr->getType());
+ Type *IntTy = DL.getIntPtrType(Ptr->getType());
Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
// three checks are required to ensure safety:
@@ -165,7 +164,7 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
}
bool BoundsChecking::runOnFunction(Function &F) {
- DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ const DataLayout &DL = F.getParent()->getDataLayout();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TrapBB = nullptr;
@@ -192,13 +191,16 @@ bool BoundsChecking::runOnFunction(Function &F) {
Builder->SetInsertPoint(Inst);
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- MadeChange |= instrument(LI->getPointerOperand(), LI);
+ MadeChange |= instrument(LI->getPointerOperand(), LI, DL);
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand());
+ MadeChange |=
+ instrument(SI->getPointerOperand(), SI->getValueOperand(), DL);
} else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst)) {
- MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand());
+ MadeChange |=
+ instrument(AI->getPointerOperand(), AI->getCompareOperand(), DL);
} else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) {
- MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand());
+ MadeChange |=
+ instrument(AI->getPointerOperand(), AI->getValOperand(), DL);
} else {
llvm_unreachable("unknown Instruction type");
}
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 6adf0d2..b3925ee 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -217,7 +217,6 @@ class DataFlowSanitizer : public ModulePass {
WK_Custom
};
- const DataLayout *DL;
Module *Mod;
LLVMContext *Ctx;
IntegerType *ShadowTy;
@@ -422,16 +421,13 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
TargetTriple.getArch() == llvm::Triple::mips64el;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
+ const DataLayout &DL = M.getDataLayout();
Mod = &M;
Ctx = &M.getContext();
ShadowTy = IntegerType::get(*Ctx, ShadowWidth);
ShadowPtrTy = PointerType::getUnqual(ShadowTy);
- IntptrTy = DL->getIntPtrType(*Ctx);
+ IntptrTy = DL.getIntPtrType(*Ctx);
ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidth / 8);
if (IsX86_64)
@@ -593,9 +589,6 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
}
bool DataFlowSanitizer::runOnModule(Module &M) {
- if (!DL)
- return false;
-
if (ABIList.isIn(M, "skip"))
return false;
@@ -1056,7 +1049,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
uint64_t ShadowAlign = Align * DFS.ShadowWidth / 8;
SmallVector<Value *, 2> Objs;
- GetUnderlyingObjects(Addr, Objs, DFS.DL);
+ GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
bool AllConstants = true;
for (SmallVector<Value *, 2>::iterator i = Objs.begin(), e = Objs.end();
i != e; ++i) {
@@ -1157,7 +1150,8 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
}
void DFSanVisitor::visitLoadInst(LoadInst &LI) {
- uint64_t Size = DFSF.DFS.DL->getTypeStoreSize(LI.getType());
+ auto &DL = LI.getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
if (Size == 0) {
DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow);
return;
@@ -1167,7 +1161,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
if (ClPreserveAlignment) {
Align = LI.getAlignment();
if (Align == 0)
- Align = DFSF.DFS.DL->getABITypeAlignment(LI.getType());
+ Align = DL.getABITypeAlignment(LI.getType());
} else {
Align = 1;
}
@@ -1235,8 +1229,8 @@ void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align,
}
void DFSanVisitor::visitStoreInst(StoreInst &SI) {
- uint64_t Size =
- DFSF.DFS.DL->getTypeStoreSize(SI.getValueOperand()->getType());
+ auto &DL = SI.getModule()->getDataLayout();
+ uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType());
if (Size == 0)
return;
@@ -1244,7 +1238,7 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
if (ClPreserveAlignment) {
Align = SI.getAlignment();
if (Align == 0)
- Align = DFSF.DFS.DL->getABITypeAlignment(SI.getValueOperand()->getType());
+ Align = DL.getABITypeAlignment(SI.getValueOperand()->getType());
} else {
Align = 1;
}
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index cb965fb..a793e69 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -47,6 +47,8 @@ using namespace llvm;
static cl::opt<std::string>
DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
cl::ValueRequired);
+static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body",
+ cl::init(false), cl::Hidden);
GCOVOptions GCOVOptions::getDefault() {
GCOVOptions Options;
@@ -55,6 +57,7 @@ GCOVOptions GCOVOptions::getDefault() {
Options.UseCfgChecksum = false;
Options.NoRedZone = false;
Options.FunctionNamesInData = true;
+ Options.ExitBlockBeforeBody = DefaultExitBlockBeforeBody;
if (DefaultGCOVVersion.size() != 4) {
llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
@@ -70,20 +73,10 @@ namespace {
class GCOVProfiler : public ModulePass {
public:
static char ID;
- GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) {
- init();
- }
- GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){
+ GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
+ GCOVProfiler(const GCOVOptions &Opts) : ModulePass(ID), Options(Opts) {
assert((Options.EmitNotes || Options.EmitData) &&
"GCOVProfiler asked to do nothing?");
- init();
- }
- const char *getPassName() const override {
- return "GCOV Profiler";
- }
-
- private:
- void init() {
ReversedVersion[0] = Options.Version[3];
ReversedVersion[1] = Options.Version[2];
ReversedVersion[2] = Options.Version[1];
@@ -91,6 +84,11 @@ namespace {
ReversedVersion[4] = '\0';
initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
}
+ const char *getPassName() const override {
+ return "GCOV Profiler";
+ }
+
+ private:
bool runOnModule(Module &M) override;
// Create the .gcno files for the Module based on DebugInfo.
@@ -312,7 +310,7 @@ namespace {
class GCOVFunction : public GCOVRecord {
public:
GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
- bool UseCfgChecksum)
+ bool UseCfgChecksum, bool ExitBlockBeforeBody)
: SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
ReturnBlock(1, os) {
this->os = os;
@@ -322,11 +320,13 @@ namespace {
uint32_t i = 0;
for (auto &BB : *F) {
- // Skip index 1 (0, 2, 3, 4, ...) because that's assigned to the
- // ReturnBlock.
- bool first = i == 0;
- Blocks.insert(std::make_pair(&BB, GCOVBlock(i++ + !first, os)));
+ // Skip index 1 if it's assigned to the ReturnBlock.
+ if (i == 1 && ExitBlockBeforeBody)
+ ++i;
+ Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os)));
}
+ if (!ExitBlockBeforeBody)
+ ReturnBlock.Number = i;
std::string FunctionNameAndLine;
raw_string_ostream FNLOS(FunctionNameAndLine);
@@ -469,7 +469,7 @@ static bool functionHasLines(Function *F) {
if (Loc.isUnknown()) continue;
// Artificial lines such as calls to the global constructors.
- if (Loc.getLine() == 0) continue;
+ if (Loc.getLine() == 0) continue;
return true;
}
@@ -513,7 +513,8 @@ void GCOVProfiler::emitProfileNotes() {
EntryBlock.splitBasicBlock(It);
Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
- Options.UseCfgChecksum));
+ Options.UseCfgChecksum,
+ Options.ExitBlockBeforeBody));
GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4152679..c2aa1e2 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -274,7 +274,6 @@ class MemorySanitizer : public FunctionPass {
MemorySanitizer(int TrackOrigins = 0)
: FunctionPass(ID),
TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
- DL(nullptr),
WarningFn(nullptr) {}
const char *getPassName() const override { return "MemorySanitizer"; }
bool runOnFunction(Function &F) override;
@@ -287,7 +286,6 @@ class MemorySanitizer : public FunctionPass {
/// \brief Track origins (allocation points) of uninitialized values.
int TrackOrigins;
- const DataLayout *DL;
LLVMContext *C;
Type *IntptrTy;
Type *OriginTy;
@@ -449,10 +447,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
///
/// inserts a call to __msan_init to the module's constructor list.
bool MemorySanitizer::doInitialization(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
+ auto &DL = M.getDataLayout();
Triple TargetTriple(M.getTargetTriple());
switch (TargetTriple.getOS()) {
@@ -604,7 +599,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
- unsigned IntptrSize = MS.DL->getTypeStoreSize(MS.IntptrTy);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
if (IntptrSize == kOriginSize) return Origin;
assert(IntptrSize == kOriginSize * 2);
Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
@@ -614,8 +610,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Fill memory range with the given origin value.
void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
unsigned Size, unsigned Alignment) {
- unsigned IntptrAlignment = MS.DL->getABITypeAlignment(MS.IntptrTy);
- unsigned IntptrSize = MS.DL->getTypeStoreSize(MS.IntptrTy);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntptrAlignment = DL.getABITypeAlignment(MS.IntptrTy);
+ unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
assert(IntptrAlignment >= kMinOriginAlignment);
assert(IntptrSize >= kOriginSize);
@@ -643,8 +640,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
unsigned Alignment, bool AsCall) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
- unsigned StoreSize = MS.DL->getTypeStoreSize(Shadow->getType());
+ unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
if (isa<StructType>(Shadow->getType())) {
paintOrigin(IRB, updateOrigin(Origin, IRB),
getOriginPtr(Addr, IRB, Alignment), StoreSize,
@@ -661,7 +659,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
unsigned TypeSizeInBits =
- MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+ DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (AsCall && SizeIndex < kNumberOfAccessSizes) {
Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
@@ -731,8 +729,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return;
}
- unsigned TypeSizeInBits =
- MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+ const DataLayout &DL = OrigIns->getModule()->getDataLayout();
+
+ unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (AsCall && SizeIndex < kNumberOfAccessSizes) {
Value *Fn = MS.MaybeWarningFn[SizeIndex];
@@ -772,7 +771,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Add MemorySanitizer instrumentation to a function.
bool runOnFunction() {
MS.initializeCallbacks(*F.getParent());
- if (!MS.DL) return false;
// In the presence of unreachable blocks, we may see Phi nodes with
// incoming nodes from such blocks. Since InstVisitor skips unreachable
@@ -828,8 +826,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// This may return weird-sized types like i1.
if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
return IT;
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
- uint32_t EltSize = MS.DL->getTypeSizeInBits(VT->getElementType());
+ uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
return VectorType::get(IntegerType::get(*MS.C, EltSize),
VT->getNumElements());
}
@@ -845,7 +844,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
return Res;
}
- uint32_t TypeSize = MS.DL->getTypeSizeInBits(OrigTy);
+ uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
return IntegerType::get(*MS.C, TypeSize);
}
@@ -1038,14 +1037,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Function *F = A->getParent();
IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
unsigned ArgOffset = 0;
+ const DataLayout &DL = F->getParent()->getDataLayout();
for (auto &FArg : F->args()) {
if (!FArg.getType()->isSized()) {
DEBUG(dbgs() << "Arg is not sized\n");
continue;
}
- unsigned Size = FArg.hasByValAttr()
- ? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType())
- : MS.DL->getTypeAllocSize(FArg.getType());
+ unsigned Size =
+ FArg.hasByValAttr()
+ ? DL.getTypeAllocSize(FArg.getType()->getPointerElementType())
+ : DL.getTypeAllocSize(FArg.getType());
if (A == &FArg) {
bool Overflow = ArgOffset + Size > kParamTLSSize;
Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
@@ -1056,7 +1057,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned ArgAlign = FArg.getParamAlignment();
if (ArgAlign == 0) {
Type *EltType = A->getType()->getPointerElementType();
- ArgAlign = MS.DL->getABITypeAlignment(EltType);
+ ArgAlign = DL.getABITypeAlignment(EltType);
}
if (Overflow) {
// ParamTLS overflow.
@@ -2427,10 +2428,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
DEBUG(dbgs() << " Arg#" << i << ": " << *A <<
" Shadow: " << *ArgShadow << "\n");
bool ArgIsInitialized = false;
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (CS.paramHasAttr(i + 1, Attribute::ByVal)) {
assert(A->getType()->isPointerTy() &&
"ByVal argument is not a pointer!");
- Size = MS.DL->getTypeAllocSize(A->getType()->getPointerElementType());
+ Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
if (ArgOffset + Size > kParamTLSSize) break;
unsigned ParamAlignment = CS.getParamAlignment(i + 1);
unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);
@@ -2438,7 +2440,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
Size, Alignment);
} else {
- Size = MS.DL->getTypeAllocSize(A->getType());
+ Size = DL.getTypeAllocSize(A->getType());
if (ArgOffset + Size > kParamTLSSize) break;
Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
kShadowTLSAlignment);
@@ -2531,7 +2533,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setShadow(&I, getCleanShadow(&I));
setOrigin(&I, getCleanOrigin());
IRBuilder<> IRB(I.getNextNode());
- uint64_t Size = MS.DL->getTypeAllocSize(I.getAllocatedType());
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(I.getAllocatedType());
if (PoisonStack && ClPoisonStackWithCall) {
IRB.CreateCall2(MS.MsanPoisonStackFn,
IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
@@ -2723,6 +2726,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
unsigned GpOffset = 0;
unsigned FpOffset = AMD64GpEndOffset;
unsigned OverflowOffset = AMD64FpEndOffset;
+ const DataLayout &DL = F.getParent()->getDataLayout();
for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
ArgIt != End; ++ArgIt) {
Value *A = *ArgIt;
@@ -2732,7 +2736,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
// ByVal arguments always go to the overflow area.
assert(A->getType()->isPointerTy());
Type *RealTy = A->getType()->getPointerElementType();
- uint64_t ArgSize = MS.DL->getTypeAllocSize(RealTy);
+ uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset);
OverflowOffset += RoundUpToAlignment(ArgSize, 8);
IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB),
@@ -2754,7 +2758,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
FpOffset += 16;
break;
case AK_Memory:
- uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType());
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
OverflowOffset += RoundUpToAlignment(ArgSize, 8);
}
@@ -2862,11 +2866,12 @@ struct VarArgMIPS64Helper : public VarArgHelper {
void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
unsigned VAArgOffset = 0;
+ const DataLayout &DL = F.getParent()->getDataLayout();
for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end();
ArgIt != End; ++ArgIt) {
Value *A = *ArgIt;
Value *Base;
- uint64_t ArgSize = MS.DL->getTypeAllocSize(A->getType());
+ uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
#if defined(__MIPSEB__) || defined(MIPSEB)
// Adjusting the shadow for argument with size < 8 to match the placement
// of bits in big endian system
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 8c56e87..289675e 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -59,6 +59,7 @@ static const char *const kSanCovWithCheckName = "__sanitizer_cov_with_check";
static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16";
static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter";
static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block";
+static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp";
static const char *const kSanCovModuleCtorName = "sancov.module_ctor";
static const uint64_t kSanCtorAndDtorPriority = 2;
@@ -72,7 +73,7 @@ static cl::opt<unsigned> ClCoverageBlockThreshold(
"sanitizer-coverage-block-threshold",
cl::desc("Use a callback with a guard check inside it if there are"
" more than this number of blocks."),
- cl::Hidden, cl::init(1000));
+ cl::Hidden, cl::init(500));
static cl::opt<bool>
ClExperimentalTracing("sanitizer-coverage-experimental-tracing",
@@ -80,6 +81,22 @@ static cl::opt<bool>
"callbacks at every basic block"),
cl::Hidden, cl::init(false));
+static cl::opt<bool>
+ ClExperimentalCMPTracing("sanitizer-coverage-experimental-trace-compares",
+ cl::desc("Experimental tracing of CMP and similar "
+ "instructions"),
+ cl::Hidden, cl::init(false));
+
+// Experimental 8-bit counters used as an additional search heuristic during
+// coverage-guided fuzzing.
+// The counters are not thread-friendly:
+// - contention on these counters may cause significant slowdown;
+// - the counter updates are racy and the results may be inaccurate.
+// They are also inaccurate due to 8-bit integer overflow.
+static cl::opt<bool> ClUse8bitCounters("sanitizer-coverage-8bit-counters",
+ cl::desc("Experimental 8-bit counters"),
+ cl::Hidden, cl::init(false));
+
namespace {
class SanitizerCoverageModule : public ModulePass {
@@ -94,26 +111,29 @@ class SanitizerCoverageModule : public ModulePass {
return "SanitizerCoverageModule";
}
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- }
-
private:
void InjectCoverageForIndirectCalls(Function &F,
ArrayRef<Instruction *> IndirCalls);
- bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
- ArrayRef<Instruction *> IndirCalls);
+ void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+ bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+ void SetNoSanitizeMetada(Instruction *I);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls);
+ unsigned NumberOfInstrumentedBlocks() {
+ return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses();
+ }
Function *SanCovFunction;
Function *SanCovWithCheckFunction;
Function *SanCovIndirCallFunction;
Function *SanCovModuleInit;
Function *SanCovTraceEnter, *SanCovTraceBB;
+ Function *SanCovTraceCmpFunction;
InlineAsm *EmptyAsm;
- Type *IntptrTy;
+ Type *IntptrTy, *Int64Ty;
LLVMContext *C;
+ const DataLayout *DL;
GlobalVariable *GuardArray;
+ GlobalVariable *EightBitCounterArray;
int CoverageLevel;
};
@@ -133,12 +153,13 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
bool SanitizerCoverageModule::runOnModule(Module &M) {
if (!CoverageLevel) return false;
C = &(M.getContext());
- DataLayoutPass *DLP = &getAnalysis<DataLayoutPass>();
- IntptrTy = Type::getIntNTy(*C, DLP->getDataLayout().getPointerSizeInBits());
+ DL = &M.getDataLayout();
+ IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int64Ty = IRB.getInt64Ty();
Function *CtorFunc =
Function::Create(FunctionType::get(VoidTy, false),
@@ -152,9 +173,12 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
M.getOrInsertFunction(kSanCovWithCheckName, VoidTy, Int32PtrTy, nullptr));
SanCovIndirCallFunction = checkInterfaceFunction(M.getOrInsertFunction(
kSanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
- SanCovModuleInit = checkInterfaceFunction(
- M.getOrInsertFunction(kSanCovModuleInitName, Type::getVoidTy(*C),
- Int32PtrTy, IntptrTy, Int8PtrTy, nullptr));
+ SanCovTraceCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
+ kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+
+ SanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction(
+ kSanCovModuleInitName, Type::getVoidTy(*C), Int32PtrTy, IntptrTy,
+ Int8PtrTy, Int8PtrTy, nullptr));
SanCovModuleInit->setLinkage(Function::ExternalLinkage);
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -171,26 +195,49 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
// At this point we create a dummy array of guards because we don't
// know how many elements we will need.
Type *Int32Ty = IRB.getInt32Ty();
+ Type *Int8Ty = IRB.getInt8Ty();
+
GuardArray =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, "__sancov_gen_cov_tmp");
+ if (ClUse8bitCounters)
+ EightBitCounterArray =
+ new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, "__sancov_gen_cov_tmp");
for (auto &F : M)
runOnFunction(F);
+ auto N = NumberOfInstrumentedBlocks();
+
// Now we know how many elements we need. Create an array of guards
// with one extra element at the beginning for the size.
- Type *Int32ArrayNTy =
- ArrayType::get(Int32Ty, SanCovFunction->getNumUses() + 1);
+ Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1);
GlobalVariable *RealGuardArray = new GlobalVariable(
M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage,
Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov");
+
// Replace the dummy array with the real one.
GuardArray->replaceAllUsesWith(
IRB.CreatePointerCast(RealGuardArray, Int32PtrTy));
GuardArray->eraseFromParent();
+ GlobalVariable *RealEightBitCounterArray;
+ if (ClUse8bitCounters) {
+ // Make sure the array is 16-aligned.
+ static const int kCounterAlignment = 16;
+ Type *Int8ArrayNTy =
+ ArrayType::get(Int8Ty, RoundUpToAlignment(N, kCounterAlignment));
+ RealEightBitCounterArray = new GlobalVariable(
+ M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage,
+ Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter");
+ RealEightBitCounterArray->setAlignment(kCounterAlignment);
+ EightBitCounterArray->replaceAllUsesWith(
+ IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy));
+ EightBitCounterArray->eraseFromParent();
+ }
+
// Create variable for module (compilation unit) name
Constant *ModNameStrConst =
ConstantDataArray::getString(M.getContext(), M.getName(), true);
@@ -200,10 +247,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
// Call __sanitizer_cov_module_init
IRB.SetInsertPoint(CtorFunc->getEntryBlock().getTerminator());
- IRB.CreateCall3(SanCovModuleInit,
- IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
- ConstantInt::get(IntptrTy, SanCovFunction->getNumUses()),
- IRB.CreatePointerCast(ModuleName, Int8PtrTy));
+ IRB.CreateCall4(
+ SanCovModuleInit, IRB.CreatePointerCast(RealGuardArray, Int32PtrTy),
+ ConstantInt::get(IntptrTy, N),
+ ClUse8bitCounters
+ ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)
+ : Constant::getNullValue(Int8PtrTy),
+ IRB.CreatePointerCast(ModuleName, Int8PtrTy));
return true;
}
@@ -215,23 +265,28 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
SplitAllCriticalEdges(F);
SmallVector<Instruction*, 8> IndirCalls;
SmallVector<BasicBlock*, 16> AllBlocks;
+ SmallVector<Instruction*, 8> CmpTraceTargets;
for (auto &BB : F) {
AllBlocks.push_back(&BB);
- if (CoverageLevel >= 4)
- for (auto &Inst : BB) {
+ for (auto &Inst : BB) {
+ if (CoverageLevel >= 4) {
CallSite CS(&Inst);
if (CS && !CS.getCalledFunction())
IndirCalls.push_back(&Inst);
}
+ if (ClExperimentalCMPTracing)
+ if (isa<ICmpInst>(&Inst))
+ CmpTraceTargets.push_back(&Inst);
+ }
}
- InjectCoverage(F, AllBlocks, IndirCalls);
+ InjectCoverage(F, AllBlocks);
+ InjectCoverageForIndirectCalls(F, IndirCalls);
+ InjectTraceForCmp(F, CmpTraceTargets);
return true;
}
-bool
-SanitizerCoverageModule::InjectCoverage(Function &F,
- ArrayRef<BasicBlock *> AllBlocks,
- ArrayRef<Instruction *> IndirCalls) {
+bool SanitizerCoverageModule::InjectCoverage(Function &F,
+ ArrayRef<BasicBlock *> AllBlocks) {
if (!CoverageLevel) return false;
if (CoverageLevel == 1) {
@@ -241,7 +296,6 @@ SanitizerCoverageModule::InjectCoverage(Function &F,
InjectCoverageAtBlock(F, *BB,
ClCoverageBlockThreshold < AllBlocks.size());
}
- InjectCoverageForIndirectCalls(F, IndirCalls);
return true;
}
@@ -273,6 +327,32 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
}
}
+void SanitizerCoverageModule::InjectTraceForCmp(
+ Function &F, ArrayRef<Instruction *> CmpTraceTargets) {
+ if (!ClExperimentalCMPTracing) return;
+ for (auto I : CmpTraceTargets) {
+ if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
+ IRBuilder<> IRB(ICMP);
+ Value *A0 = ICMP->getOperand(0);
+ Value *A1 = ICMP->getOperand(1);
+ if (!A0->getType()->isIntegerTy()) continue;
+ uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType());
+ // __sanitizer_cov_indir_call((type_size << 32) | predicate, A0, A1);
+ IRB.CreateCall3(
+ SanCovTraceCmpFunction,
+ ConstantInt::get(Int64Ty, (TypeSize << 32) | ICMP->getPredicate()),
+ IRB.CreateIntCast(A0, Int64Ty, true),
+ IRB.CreateIntCast(A1, Int64Ty, true));
+ }
+ }
+}
+
+void SanitizerCoverageModule::SetNoSanitizeMetada(Instruction *I) {
+ I->setMetadata(
+ I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"),
+ MDNode::get(*C, None));
+}
+
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
bool UseCalls) {
BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
@@ -286,14 +366,15 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
}
bool IsEntryBB = &BB == &F.getEntryBlock();
- DebugLoc EntryLoc =
- IsEntryBB ? IP->getDebugLoc().getFnDebugLoc(*C) : IP->getDebugLoc();
+ DebugLoc EntryLoc = IsEntryBB && !IP->getDebugLoc().isUnknown()
+ ? IP->getDebugLoc().getFnDebugLoc(*C)
+ : IP->getDebugLoc();
IRBuilder<> IRB(IP);
IRB.SetCurrentDebugLocation(EntryLoc);
SmallVector<Value *, 1> Indices;
Value *GuardP = IRB.CreateAdd(
IRB.CreatePointerCast(GuardArray, IntptrTy),
- ConstantInt::get(IntptrTy, (1 + SanCovFunction->getNumUses()) * 4));
+ ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
if (UseCalls) {
@@ -302,8 +383,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
LoadInst *Load = IRB.CreateLoad(GuardP);
Load->setAtomic(Monotonic);
Load->setAlignment(4);
- Load->setMetadata(F.getParent()->getMDKindID("nosanitize"),
- MDNode::get(*C, None));
+ SetNoSanitizeMetada(Load);
Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
Instruction *Ins = SplitBlockAndInsertIfThen(
Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
@@ -314,6 +394,19 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
IRB.CreateCall(EmptyAsm); // Avoids callback merge.
}
+ if(ClUse8bitCounters) {
+ IRB.SetInsertPoint(IP);
+ Value *P = IRB.CreateAdd(
+ IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
+ ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
+ P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy());
+ LoadInst *LI = IRB.CreateLoad(P);
+ Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1));
+ StoreInst *SI = IRB.CreateStore(Inc, P);
+ SetNoSanitizeMetada(LI);
+ SetNoSanitizeMetada(SI);
+ }
+
if (ClExperimentalTracing) {
// Experimental support for tracing.
// Insert a callback with the same guard variable as used for coverage.
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index e4a4911..c3ba722 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -19,14 +19,14 @@
// The rest is handled by the run-time library.
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -76,7 +76,7 @@ namespace {
/// ThreadSanitizer: instrument the code in module to find races.
struct ThreadSanitizer : public FunctionPass {
- ThreadSanitizer() : FunctionPass(ID), DL(nullptr) {}
+ ThreadSanitizer() : FunctionPass(ID) {}
const char *getPassName() const override;
bool runOnFunction(Function &F) override;
bool doInitialization(Module &M) override;
@@ -84,15 +84,15 @@ struct ThreadSanitizer : public FunctionPass {
private:
void initializeCallbacks(Module &M);
- bool instrumentLoadOrStore(Instruction *I);
- bool instrumentAtomic(Instruction *I);
+ bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
+ bool instrumentAtomic(Instruction *I, const DataLayout &DL);
bool instrumentMemIntrinsic(Instruction *I);
- void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
- SmallVectorImpl<Instruction*> &All);
+ void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local,
+ SmallVectorImpl<Instruction *> &All,
+ const DataLayout &DL);
bool addrPointsToConstantData(Value *Addr);
- int getMemoryAccessFuncIndex(Value *Addr);
+ int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
- const DataLayout *DL;
Type *IntptrTy;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
@@ -230,10 +230,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
}
bool ThreadSanitizer::doInitialization(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
+ const DataLayout &DL = M.getDataLayout();
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
@@ -285,8 +282,8 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
// 'Local' is a vector of insns within the same BB (no calls between).
// 'All' is a vector of insns that will be instrumented.
void ThreadSanitizer::chooseInstructionsToInstrument(
- SmallVectorImpl<Instruction*> &Local,
- SmallVectorImpl<Instruction*> &All) {
+ SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All,
+ const DataLayout &DL) {
SmallSet<Value*, 8> WriteTargets;
// Iterate from the end.
for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
@@ -310,7 +307,7 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
Value *Addr = isa<StoreInst>(*I)
? cast<StoreInst>(I)->getPointerOperand()
: cast<LoadInst>(I)->getPointerOperand();
- if (isa<AllocaInst>(GetUnderlyingObject(Addr, nullptr)) &&
+ if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
!PointerMayBeCaptured(Addr, true, true)) {
// The variable is addressable but not captured, so it cannot be
// referenced from a different thread and participate in a data race
@@ -338,7 +335,6 @@ static bool isAtomic(Instruction *I) {
}
bool ThreadSanitizer::runOnFunction(Function &F) {
- if (!DL) return false;
initializeCallbacks(*F.getParent());
SmallVector<Instruction*, 8> RetVec;
SmallVector<Instruction*, 8> AllLoadsAndStores;
@@ -348,6 +344,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
bool Res = false;
bool HasCalls = false;
bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
+ const DataLayout &DL = F.getParent()->getDataLayout();
// Traverse all instructions, collect loads/stores/returns, check for calls.
for (auto &BB : F) {
@@ -362,10 +359,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
if (isa<MemIntrinsic>(Inst))
MemIntrinCalls.push_back(&Inst);
HasCalls = true;
- chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+ chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores,
+ DL);
}
}
- chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+ chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, DL);
}
// We have collected all loads and stores.
@@ -375,14 +373,14 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
// Instrument memory accesses only if we want to report bugs in the function.
if (ClInstrumentMemoryAccesses && SanitizeFunction)
for (auto Inst : AllLoadsAndStores) {
- Res |= instrumentLoadOrStore(Inst);
+ Res |= instrumentLoadOrStore(Inst, DL);
}
// Instrument atomic memory accesses in any case (they can be used to
// implement synchronization).
if (ClInstrumentAtomics)
for (auto Inst : AtomicAccesses) {
- Res |= instrumentAtomic(Inst);
+ Res |= instrumentAtomic(Inst, DL);
}
if (ClInstrumentMemIntrinsics && SanitizeFunction)
@@ -406,13 +404,14 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
return Res;
}
-bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
+bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
+ const DataLayout &DL) {
IRBuilder<> IRB(I);
bool IsWrite = isa<StoreInst>(*I);
Value *Addr = IsWrite
? cast<StoreInst>(I)->getPointerOperand()
: cast<LoadInst>(I)->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
if (IsWrite && isVtableAccess(I)) {
@@ -443,7 +442,7 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
? cast<StoreInst>(I)->getAlignment()
: cast<LoadInst>(I)->getAlignment();
Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
- const uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
+ const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
Value *OnAccessFunc = nullptr;
if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0)
OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
@@ -504,11 +503,11 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
// The following page contains more background information:
// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
-bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
+bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
IRBuilder<> IRB(I);
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Value *Addr = LI->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
const size_t ByteSize = 1 << Idx;
@@ -522,7 +521,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Value *Addr = SI->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
const size_t ByteSize = 1 << Idx;
@@ -536,7 +535,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
ReplaceInstWithInst(I, C);
} else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I)) {
Value *Addr = RMWI->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
@@ -553,7 +552,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
ReplaceInstWithInst(I, C);
} else if (AtomicCmpXchgInst *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
Value *Addr = CASI->getPointerOperand();
- int Idx = getMemoryAccessFuncIndex(Addr);
+ int Idx = getMemoryAccessFuncIndex(Addr, DL);
if (Idx < 0)
return false;
const size_t ByteSize = 1 << Idx;
@@ -583,11 +582,12 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
return true;
}
-int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) {
+int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr,
+ const DataLayout &DL) {
Type *OrigPtrTy = Addr->getType();
Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
assert(OrigTy->isSized());
- uint32_t TypeSize = DL->getTypeStoreSizeInBits(OrigTy);
+ uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
if (TypeSize != 8 && TypeSize != 16 &&
TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
NumAccessesWithBadSize++;
diff --git a/lib/Transforms/ObjCARC/ARCInstKind.cpp b/lib/Transforms/ObjCARC/ARCInstKind.cpp
index f1e9dce..72df9ab 100644
--- a/lib/Transforms/ObjCARC/ARCInstKind.cpp
+++ b/lib/Transforms/ObjCARC/ARCInstKind.cpp
@@ -168,6 +168,60 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
return ARCInstKind::CallOrUser;
}
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isInertIntrinsic(unsigned ID) {
+ // TODO: Make this into a covered switch.
+ switch (ID) {
+ case Intrinsic::returnaddress:
+ case Intrinsic::frameaddress:
+ case Intrinsic::stacksave:
+ case Intrinsic::stackrestore:
+ case Intrinsic::vastart:
+ case Intrinsic::vacopy:
+ case Intrinsic::vaend:
+ case Intrinsic::objectsize:
+ case Intrinsic::prefetch:
+ case Intrinsic::stackprotector:
+ case Intrinsic::eh_return_i32:
+ case Intrinsic::eh_return_i64:
+ case Intrinsic::eh_typeid_for:
+ case Intrinsic::eh_dwarf_cfa:
+ case Intrinsic::eh_sjlj_lsda:
+ case Intrinsic::eh_sjlj_functioncontext:
+ case Intrinsic::init_trampoline:
+ case Intrinsic::adjust_trampoline:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ // Don't let dbg info affect our results.
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ // Short cut: Some intrinsics obviously don't use ObjC pointers.
+ return true;
+ default:
+ return false;
+ }
+}
+
+// A whitelist of intrinsics that we know do not use objc pointers or decrement
+// ref counts.
+static bool isUseOnlyIntrinsic(unsigned ID) {
+ // We are conservative and even though intrinsics are unlikely to touch
+ // reference counts, we white list them for safety.
+ //
+ // TODO: Expand this into a covered switch. There is a lot more here.
+ switch (ID) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// \brief Determine what kind of construct V is.
ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
if (const Instruction *I = dyn_cast<Instruction>(V)) {
@@ -180,49 +234,23 @@ ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
switch (I->getOpcode()) {
case Instruction::Call: {
const CallInst *CI = cast<CallInst>(I);
- // Check for calls to special functions.
+ // See if we have a function that we know something about.
if (const Function *F = CI->getCalledFunction()) {
ARCInstKind Class = GetFunctionClass(F);
if (Class != ARCInstKind::CallOrUser)
return Class;
-
- // None of the intrinsic functions do objc_release. For intrinsics, the
- // only question is whether or not they may be users.
- switch (F->getIntrinsicID()) {
- case Intrinsic::returnaddress:
- case Intrinsic::frameaddress:
- case Intrinsic::stacksave:
- case Intrinsic::stackrestore:
- case Intrinsic::vastart:
- case Intrinsic::vacopy:
- case Intrinsic::vaend:
- case Intrinsic::objectsize:
- case Intrinsic::prefetch:
- case Intrinsic::stackprotector:
- case Intrinsic::eh_return_i32:
- case Intrinsic::eh_return_i64:
- case Intrinsic::eh_typeid_for:
- case Intrinsic::eh_dwarf_cfa:
- case Intrinsic::eh_sjlj_lsda:
- case Intrinsic::eh_sjlj_functioncontext:
- case Intrinsic::init_trampoline:
- case Intrinsic::adjust_trampoline:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- // Don't let dbg info affect our results.
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- // Short cut: Some intrinsics obviously don't use ObjC pointers.
+ unsigned ID = F->getIntrinsicID();
+ if (isInertIntrinsic(ID))
return ARCInstKind::None;
- default:
- break;
- }
+ if (isUseOnlyIntrinsic(ID))
+ return ARCInstKind::User;
}
+
+ // Otherwise, be conservative.
return GetCallSiteClass(CI);
}
case Instruction::Invoke:
+ // Otherwise, be conservative.
return GetCallSiteClass(cast<InvokeInst>(I));
case Instruction::BitCast:
case Instruction::GetElementPtr:
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index e286dbc..87de33b 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -27,22 +27,22 @@
namespace llvm {
namespace objcarc {
+enum class ARCRuntimeEntryPointKind {
+ AutoreleaseRV,
+ Release,
+ Retain,
+ RetainBlock,
+ Autorelease,
+ StoreStrong,
+ RetainRV,
+ RetainAutorelease,
+ RetainAutoreleaseRV,
+};
+
/// Declarations for ObjC runtime functions and constants. These are initialized
/// lazily to avoid cluttering up the Module with unused declarations.
class ARCRuntimeEntryPoints {
public:
- enum EntryPointType {
- EPT_AutoreleaseRV,
- EPT_Release,
- EPT_Retain,
- EPT_RetainBlock,
- EPT_Autorelease,
- EPT_StoreStrong,
- EPT_RetainRV,
- EPT_RetainAutorelease,
- EPT_RetainAutoreleaseRV
- };
-
ARCRuntimeEntryPoints() : TheModule(nullptr),
AutoreleaseRV(nullptr),
Release(nullptr),
@@ -56,7 +56,7 @@ public:
~ARCRuntimeEntryPoints() { }
- void Initialize(Module *M) {
+ void init(Module *M) {
TheModule = M;
AutoreleaseRV = nullptr;
Release = nullptr;
@@ -69,30 +69,30 @@ public:
RetainAutoreleaseRV = nullptr;
}
- Constant *get(const EntryPointType entry) {
+ Constant *get(ARCRuntimeEntryPointKind kind) {
assert(TheModule != nullptr && "Not initialized.");
- switch (entry) {
- case EPT_AutoreleaseRV:
+ switch (kind) {
+ case ARCRuntimeEntryPointKind::AutoreleaseRV:
return getI8XRetI8XEntryPoint(AutoreleaseRV,
"objc_autoreleaseReturnValue", true);
- case EPT_Release:
+ case ARCRuntimeEntryPointKind::Release:
return getVoidRetI8XEntryPoint(Release, "objc_release");
- case EPT_Retain:
+ case ARCRuntimeEntryPointKind::Retain:
return getI8XRetI8XEntryPoint(Retain, "objc_retain", true);
- case EPT_RetainBlock:
+ case ARCRuntimeEntryPointKind::RetainBlock:
return getI8XRetI8XEntryPoint(RetainBlock, "objc_retainBlock", false);
- case EPT_Autorelease:
+ case ARCRuntimeEntryPointKind::Autorelease:
return getI8XRetI8XEntryPoint(Autorelease, "objc_autorelease", true);
- case EPT_StoreStrong:
+ case ARCRuntimeEntryPointKind::StoreStrong:
return getI8XRetI8XXI8XEntryPoint(StoreStrong, "objc_storeStrong");
- case EPT_RetainRV:
+ case ARCRuntimeEntryPointKind::RetainRV:
return getI8XRetI8XEntryPoint(RetainRV,
"objc_retainAutoreleasedReturnValue", true);
- case EPT_RetainAutorelease:
+ case ARCRuntimeEntryPointKind::RetainAutorelease:
return getI8XRetI8XEntryPoint(RetainAutorelease, "objc_retainAutorelease",
true);
- case EPT_RetainAutoreleaseRV:
+ case ARCRuntimeEntryPointKind::RetainAutoreleaseRV:
return getI8XRetI8XEntryPoint(RetainAutoreleaseRV,
"objc_retainAutoreleaseReturnValue", true);
}
diff --git a/lib/Transforms/ObjCARC/Android.mk b/lib/Transforms/ObjCARC/Android.mk
index 97c5a9d..e120fbe 100644
--- a/lib/Transforms/ObjCARC/Android.mk
+++ b/lib/Transforms/ObjCARC/Android.mk
@@ -9,6 +9,7 @@ transforms_objcarc_SRC_FILES := \
ObjCARC.cpp \
ObjCARCExpand.cpp \
ObjCARCOpts.cpp \
+ PtrState.cpp \
ProvenanceAnalysis.cpp \
ProvenanceAnalysisEvaluator.cpp
diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h
new file mode 100644
index 0000000..d6439b6
--- /dev/null
+++ b/lib/Transforms/ObjCARC/BlotMapVector.h
@@ -0,0 +1,108 @@
+//===- BlotMapVector.h - A MapVector with the blot operation -*- C++ -*----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+/// \brief An associative container with fast insertion-order (deterministic)
+/// iteration over its elements. Plus the special blot operation.
+template <class KeyT, class ValueT> class BlotMapVector {
+ /// Map keys to indices in Vector.
+ typedef DenseMap<KeyT, size_t> MapTy;
+ MapTy Map;
+
+ typedef std::vector<std::pair<KeyT, ValueT>> VectorTy;
+ /// Keys and values.
+ VectorTy Vector;
+
+public:
+ typedef typename VectorTy::iterator iterator;
+ typedef typename VectorTy::const_iterator const_iterator;
+ iterator begin() { return Vector.begin(); }
+ iterator end() { return Vector.end(); }
+ const_iterator begin() const { return Vector.begin(); }
+ const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+ ~BlotMapVector() {
+ assert(Vector.size() >= Map.size()); // May differ due to blotting.
+ for (typename MapTy::const_iterator I = Map.begin(), E = Map.end(); I != E;
+ ++I) {
+ assert(I->second < Vector.size());
+ assert(Vector[I->second].first == I->first);
+ }
+ for (typename VectorTy::const_iterator I = Vector.begin(), E = Vector.end();
+ I != E; ++I)
+ assert(!I->first || (Map.count(I->first) &&
+ Map[I->first] == size_t(I - Vector.begin())));
+ }
+#endif
+
+ ValueT &operator[](const KeyT &Arg) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(Arg, size_t(0)));
+ if (Pair.second) {
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
+ Vector.push_back(std::make_pair(Arg, ValueT()));
+ return Vector[Num].second;
+ }
+ return Vector[Pair.first->second].second;
+ }
+
+ std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &InsertPair) {
+ std::pair<typename MapTy::iterator, bool> Pair =
+ Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+ if (Pair.second) {
+ size_t Num = Vector.size();
+ Pair.first->second = Num;
+ Vector.push_back(InsertPair);
+ return std::make_pair(Vector.begin() + Num, true);
+ }
+ return std::make_pair(Vector.begin() + Pair.first->second, false);
+ }
+
+ iterator find(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end())
+ return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
+ const_iterator find(const KeyT &Key) const {
+ typename MapTy::const_iterator It = Map.find(Key);
+ if (It == Map.end())
+ return Vector.end();
+ return Vector.begin() + It->second;
+ }
+
+ /// This is similar to erase, but instead of removing the element from the
+ /// vector, it just zeros out the key in the vector. This leaves iterators
+ /// intact, but clients must be prepared for zeroed-out keys when iterating.
+ void blot(const KeyT &Key) {
+ typename MapTy::iterator It = Map.find(Key);
+ if (It == Map.end())
+ return;
+ Vector[It->second].first = KeyT();
+ Map.erase(It);
+ }
+
+ void clear() {
+ Map.clear();
+ Vector.clear();
+ }
+
+ bool empty() const {
+ assert(Map.empty() == Vector.empty());
+ return Map.empty();
+ }
+};
+} //
diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt
index 2adea88..fbcae29 100644
--- a/lib/Transforms/ObjCARC/CMakeLists.txt
+++ b/lib/Transforms/ObjCARC/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMObjCARCOpts
DependencyAnalysis.cpp
ProvenanceAnalysis.cpp
ProvenanceAnalysisEvaluator.cpp
+ PtrState.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4985d0e..b197c97 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -53,10 +53,12 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
if (AliasAnalysis::onlyReadsMemory(MRB))
return false;
if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+ const DataLayout &DL = Inst->getModule()->getDataLayout();
for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I) {
const Value *Op = *I;
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+ PA.related(Ptr, Op, DL))
return true;
}
return false;
@@ -87,6 +89,8 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
if (Class == ARCInstKind::Call)
return false;
+ const DataLayout &DL = Inst->getModule()->getDataLayout();
+
// Consider various instructions which may have pointer arguments which are
// not "uses".
if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
@@ -100,24 +104,26 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
OE = CS.arg_end(); OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+ PA.related(Ptr, Op, DL))
return true;
}
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Special-case stores, because we don't care about the stored value, just
// the store address.
- const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+ const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand(), DL);
// If we can't tell what the underlying object was, assume there is a
// dependence.
- return IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Op, Ptr);
+ return IsPotentialRetainableObjPtr(Op, *PA.getAA()) &&
+ PA.related(Op, Ptr, DL);
}
// Check each operand for a match.
for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
OI != OE; ++OI) {
const Value *Op = *OI;
- if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
+ if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op, DL))
return true;
}
return false;
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index df29f05..7595e2d 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -24,6 +24,7 @@
#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -72,9 +73,10 @@ static inline bool ModuleHasARC(const Module &M) {
/// \brief This is a wrapper around getUnderlyingObject which also knows how to
/// look through objc_retain and objc_autorelease calls, which we know to return
/// their argument verbatim.
-static inline const Value *GetUnderlyingObjCPtr(const Value *V) {
+static inline const Value *GetUnderlyingObjCPtr(const Value *V,
+ const DataLayout &DL) {
for (;;) {
- V = GetUnderlyingObject(V);
+ V = GetUnderlyingObject(V, DL);
if (!IsForwarding(GetBasicARCInstKind(V)))
break;
V = cast<CallInst>(V)->getArgOperand(0);
@@ -257,6 +259,55 @@ static inline bool IsObjCIdentifiedObject(const Value *V) {
return false;
}
+enum class ARCMDKindID {
+ ImpreciseRelease,
+ CopyOnEscape,
+ NoObjCARCExceptions,
+};
+
+/// A cache of MDKinds used by various ARC optimizations.
+class ARCMDKindCache {
+ Module *M;
+
+ /// The Metadata Kind for clang.imprecise_release metadata.
+ llvm::Optional<unsigned> ImpreciseReleaseMDKind;
+
+ /// The Metadata Kind for clang.arc.copy_on_escape metadata.
+ llvm::Optional<unsigned> CopyOnEscapeMDKind;
+
+ /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
+ llvm::Optional<unsigned> NoObjCARCExceptionsMDKind;
+
+public:
+ void init(Module *Mod) {
+ M = Mod;
+ ImpreciseReleaseMDKind = NoneType::None;
+ CopyOnEscapeMDKind = NoneType::None;
+ NoObjCARCExceptionsMDKind = NoneType::None;
+ }
+
+ unsigned get(ARCMDKindID ID) {
+ switch (ID) {
+ case ARCMDKindID::ImpreciseRelease:
+ if (!ImpreciseReleaseMDKind)
+ ImpreciseReleaseMDKind =
+ M->getContext().getMDKindID("clang.imprecise_release");
+ return *ImpreciseReleaseMDKind;
+ case ARCMDKindID::CopyOnEscape:
+ if (!CopyOnEscapeMDKind)
+ CopyOnEscapeMDKind =
+ M->getContext().getMDKindID("clang.arc.copy_on_escape");
+ return *CopyOnEscapeMDKind;
+ case ARCMDKindID::NoObjCARCExceptions:
+ if (!NoObjCARCExceptionsMDKind)
+ NoObjCARCExceptionsMDKind =
+ M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+ return *NoObjCARCExceptionsMDKind;
+ }
+ llvm_unreachable("Covered switch isn't covered?!");
+ }
+};
+
} // end namespace objcarc
} // end namespace llvm
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index be291a0..b1515e3 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -46,6 +46,11 @@ ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
return new ObjCARCAliasAnalysis();
}
+bool ObjCARCAliasAnalysis::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
+
void
ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -69,8 +74,8 @@ ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *UA = GetUnderlyingObjCPtr(SA);
- const Value *UB = GetUnderlyingObjCPtr(SB);
+ const Value *UA = GetUnderlyingObjCPtr(SA, *DL);
+ const Value *UB = GetUnderlyingObjCPtr(SB, *DL);
if (UA != SA || UB != SB) {
Result = AliasAnalysis::alias(Location(UA), Location(UB));
// We can't use MustAlias or PartialAlias results here because
@@ -99,7 +104,7 @@ ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
// If that failed, climb to the underlying object, including climbing through
// ObjC-specific no-ops, and try making an imprecise alias query.
- const Value *U = GetUnderlyingObjCPtr(S);
+ const Value *U = GetUnderlyingObjCPtr(S, *DL);
if (U != S)
return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
index 3fcea4e..3c5a021 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
@@ -44,9 +44,7 @@ namespace objcarc {
}
private:
- void initializePass() override {
- InitializeAliasAnalysis(this);
- }
+ bool doInitialization(Module &M) override;
/// This method is used when a pass implements an analysis interface through
/// multiple inheritance. If needed, it should override this to adjust the
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 6473d3a..2a3139f 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::objcarc;
@@ -134,7 +135,7 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
// We do not have to worry about tail calls/does not throw since
// retain/retainRV have the same properties.
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_RetainRV);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::RetainRV);
cast<CallInst>(Retain)->setCalledFunction(Decl);
DEBUG(dbgs() << "New: " << *Retain << "\n");
@@ -181,8 +182,8 @@ bool ObjCARCContract::contractAutorelease(
" Retain: " << *Retain << "\n");
Constant *Decl = EP.get(Class == ARCInstKind::AutoreleaseRV
- ? ARCRuntimeEntryPoints::EPT_RetainAutoreleaseRV
- : ARCRuntimeEntryPoints::EPT_RetainAutorelease);
+ ? ARCRuntimeEntryPointKind::RetainAutoreleaseRV
+ : ARCRuntimeEntryPointKind::RetainAutorelease);
Retain->setCalledFunction(Decl);
DEBUG(dbgs() << " New RetainAutorelease: " << *Retain << "\n");
@@ -380,7 +381,7 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release,
Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
if (Args[1]->getType() != I8X)
Args[1] = new BitCastInst(Args[1], I8X, "", Store);
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_StoreStrong);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::StoreStrong);
CallInst *StoreStrong = CallInst::Create(Decl, Args, "", Store);
StoreStrong->setDoesNotThrow();
StoreStrong->setDebugLoc(Store->getDebugLoc());
@@ -647,7 +648,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
if (!Run)
return false;
- EP.Initialize(&M);
+ EP.init(&M);
// Initialize RetainRVMarker.
RetainRVMarker = nullptr;
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index f55b77f..4d75658 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -26,9 +26,11 @@
#include "ObjCARC.h"
#include "ARCRuntimeEntryPoints.h"
+#include "BlotMapVector.h"
#include "DependencyAnalysis.h"
#include "ObjCARCAliasAnalysis.h"
#include "ProvenanceAnalysis.h"
+#include "PtrState.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
@@ -45,102 +47,6 @@ using namespace llvm::objcarc;
#define DEBUG_TYPE "objc-arc-opts"
-/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific.
-/// @{
-
-namespace {
- /// \brief An associative container with fast insertion-order (deterministic)
- /// iteration over its elements. Plus the special blot operation.
- template<class KeyT, class ValueT>
- class MapVector {
- /// Map keys to indices in Vector.
- typedef DenseMap<KeyT, size_t> MapTy;
- MapTy Map;
-
- typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
- /// Keys and values.
- VectorTy Vector;
-
- public:
- typedef typename VectorTy::iterator iterator;
- typedef typename VectorTy::const_iterator const_iterator;
- iterator begin() { return Vector.begin(); }
- iterator end() { return Vector.end(); }
- const_iterator begin() const { return Vector.begin(); }
- const_iterator end() const { return Vector.end(); }
-
-#ifdef XDEBUG
- ~MapVector() {
- assert(Vector.size() >= Map.size()); // May differ due to blotting.
- for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
- I != E; ++I) {
- assert(I->second < Vector.size());
- assert(Vector[I->second].first == I->first);
- }
- for (typename VectorTy::const_iterator I = Vector.begin(),
- E = Vector.end(); I != E; ++I)
- assert(!I->first ||
- (Map.count(I->first) &&
- Map[I->first] == size_t(I - Vector.begin())));
- }
-#endif
-
- ValueT &operator[](const KeyT &Arg) {
- std::pair<typename MapTy::iterator, bool> Pair =
- Map.insert(std::make_pair(Arg, size_t(0)));
- if (Pair.second) {
- size_t Num = Vector.size();
- Pair.first->second = Num;
- Vector.push_back(std::make_pair(Arg, ValueT()));
- return Vector[Num].second;
- }
- return Vector[Pair.first->second].second;
- }
-
- std::pair<iterator, bool>
- insert(const std::pair<KeyT, ValueT> &InsertPair) {
- std::pair<typename MapTy::iterator, bool> Pair =
- Map.insert(std::make_pair(InsertPair.first, size_t(0)));
- if (Pair.second) {
- size_t Num = Vector.size();
- Pair.first->second = Num;
- Vector.push_back(InsertPair);
- return std::make_pair(Vector.begin() + Num, true);
- }
- return std::make_pair(Vector.begin() + Pair.first->second, false);
- }
-
- iterator find(const KeyT &Key) {
- typename MapTy::iterator It = Map.find(Key);
- if (It == Map.end()) return Vector.end();
- return Vector.begin() + It->second;
- }
-
- const_iterator find(const KeyT &Key) const {
- typename MapTy::const_iterator It = Map.find(Key);
- if (It == Map.end()) return Vector.end();
- return Vector.begin() + It->second;
- }
-
- /// This is similar to erase, but instead of removing the element from the
- /// vector, it just zeros out the key in the vector. This leaves iterators
- /// intact, but clients must be prepared for zeroed-out keys when iterating.
- void blot(const KeyT &Key) {
- typename MapTy::iterator It = Map.find(Key);
- if (It == Map.end()) return;
- Vector[It->second].first = KeyT();
- Map.erase(It);
- }
-
- void clear() {
- Map.clear();
- Vector.clear();
- }
- };
-}
-
-/// @}
-///
/// \defgroup ARCUtilities Utility declarations/definitions specific to ARC.
/// @{
@@ -177,13 +83,14 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
/// This is a wrapper around getUnderlyingObjCPtr along the lines of
/// GetUnderlyingObjects except that it returns early when it sees the first
/// alloca.
-static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V) {
+static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V,
+ const DataLayout &DL) {
SmallPtrSet<const Value *, 4> Visited;
SmallVector<const Value *, 4> Worklist;
Worklist.push_back(V);
do {
const Value *P = Worklist.pop_back_val();
- P = GetUnderlyingObjCPtr(P);
+ P = GetUnderlyingObjCPtr(P, DL);
if (isa<AllocaInst>(P))
return true;
@@ -270,293 +177,6 @@ STATISTIC(NumReleasesAfterOpt,
#endif
namespace {
- /// \enum Sequence
- ///
- /// \brief A sequence of states that a pointer may go through in which an
- /// objc_retain and objc_release are actually needed.
- enum Sequence {
- S_None,
- S_Retain, ///< objc_retain(x).
- S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement.
- S_Use, ///< any use of x.
- S_Stop, ///< like S_Release, but code motion is stopped.
- S_Release, ///< objc_release(x).
- S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
- };
-
- raw_ostream &operator<<(raw_ostream &OS, const Sequence S)
- LLVM_ATTRIBUTE_UNUSED;
- raw_ostream &operator<<(raw_ostream &OS, const Sequence S) {
- switch (S) {
- case S_None:
- return OS << "S_None";
- case S_Retain:
- return OS << "S_Retain";
- case S_CanRelease:
- return OS << "S_CanRelease";
- case S_Use:
- return OS << "S_Use";
- case S_Release:
- return OS << "S_Release";
- case S_MovableRelease:
- return OS << "S_MovableRelease";
- case S_Stop:
- return OS << "S_Stop";
- }
- llvm_unreachable("Unknown sequence type.");
- }
-}
-
-static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
- // The easy cases.
- if (A == B)
- return A;
- if (A == S_None || B == S_None)
- return S_None;
-
- if (A > B) std::swap(A, B);
- if (TopDown) {
- // Choose the side which is further along in the sequence.
- if ((A == S_Retain || A == S_CanRelease) &&
- (B == S_CanRelease || B == S_Use))
- return B;
- } else {
- // Choose the side which is further along in the sequence.
- if ((A == S_Use || A == S_CanRelease) &&
- (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
- return A;
- // If both sides are releases, choose the more conservative one.
- if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
- return A;
- if (A == S_Release && B == S_MovableRelease)
- return A;
- }
-
- return S_None;
-}
-
-namespace {
- /// \brief Unidirectional information about either a
- /// retain-decrement-use-release sequence or release-use-decrement-retain
- /// reverse sequence.
- struct RRInfo {
- /// After an objc_retain, the reference count of the referenced
- /// object is known to be positive. Similarly, before an objc_release, the
- /// reference count of the referenced object is known to be positive. If
- /// there are retain-release pairs in code regions where the retain count
- /// is known to be positive, they can be eliminated, regardless of any side
- /// effects between them.
- ///
- /// Also, a retain+release pair nested within another retain+release
- /// pair all on the known same pointer value can be eliminated, regardless
- /// of any intervening side effects.
- ///
- /// KnownSafe is true when either of these conditions is satisfied.
- bool KnownSafe;
-
- /// True of the objc_release calls are all marked with the "tail" keyword.
- bool IsTailCallRelease;
-
- /// If the Calls are objc_release calls and they all have a
- /// clang.imprecise_release tag, this is the metadata tag.
- MDNode *ReleaseMetadata;
-
- /// For a top-down sequence, the set of objc_retains or
- /// objc_retainBlocks. For bottom-up, the set of objc_releases.
- SmallPtrSet<Instruction *, 2> Calls;
-
- /// The set of optimal insert positions for moving calls in the opposite
- /// sequence.
- SmallPtrSet<Instruction *, 2> ReverseInsertPts;
-
- /// If this is true, we cannot perform code motion but can still remove
- /// retain/release pairs.
- bool CFGHazardAfflicted;
-
- RRInfo() :
- KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr),
- CFGHazardAfflicted(false) {}
-
- void clear();
-
- /// Conservatively merge the two RRInfo. Returns true if a partial merge has
- /// occurred, false otherwise.
- bool Merge(const RRInfo &Other);
-
- };
-}
-
-void RRInfo::clear() {
- KnownSafe = false;
- IsTailCallRelease = false;
- ReleaseMetadata = nullptr;
- Calls.clear();
- ReverseInsertPts.clear();
- CFGHazardAfflicted = false;
-}
-
-bool RRInfo::Merge(const RRInfo &Other) {
- // Conservatively merge the ReleaseMetadata information.
- if (ReleaseMetadata != Other.ReleaseMetadata)
- ReleaseMetadata = nullptr;
-
- // Conservatively merge the boolean state.
- KnownSafe &= Other.KnownSafe;
- IsTailCallRelease &= Other.IsTailCallRelease;
- CFGHazardAfflicted |= Other.CFGHazardAfflicted;
-
- // Merge the call sets.
- Calls.insert(Other.Calls.begin(), Other.Calls.end());
-
- // Merge the insert point sets. If there are any differences,
- // that makes this a partial merge.
- bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size();
- for (Instruction *Inst : Other.ReverseInsertPts)
- Partial |= ReverseInsertPts.insert(Inst).second;
- return Partial;
-}
-
-namespace {
- /// \brief This class summarizes several per-pointer runtime properties which
- /// are propogated through the flow graph.
- class PtrState {
- /// True if the reference count is known to be incremented.
- bool KnownPositiveRefCount;
-
- /// True if we've seen an opportunity for partial RR elimination, such as
- /// pushing calls into a CFG triangle or into one side of a CFG diamond.
- bool Partial;
-
- /// The current position in the sequence.
- unsigned char Seq : 8;
-
- /// Unidirectional information about the current sequence.
- RRInfo RRI;
-
- public:
- PtrState() : KnownPositiveRefCount(false), Partial(false),
- Seq(S_None) {}
-
-
- bool IsKnownSafe() const {
- return RRI.KnownSafe;
- }
-
- void SetKnownSafe(const bool NewValue) {
- RRI.KnownSafe = NewValue;
- }
-
- bool IsTailCallRelease() const {
- return RRI.IsTailCallRelease;
- }
-
- void SetTailCallRelease(const bool NewValue) {
- RRI.IsTailCallRelease = NewValue;
- }
-
- bool IsTrackingImpreciseReleases() const {
- return RRI.ReleaseMetadata != nullptr;
- }
-
- const MDNode *GetReleaseMetadata() const {
- return RRI.ReleaseMetadata;
- }
-
- void SetReleaseMetadata(MDNode *NewValue) {
- RRI.ReleaseMetadata = NewValue;
- }
-
- bool IsCFGHazardAfflicted() const {
- return RRI.CFGHazardAfflicted;
- }
-
- void SetCFGHazardAfflicted(const bool NewValue) {
- RRI.CFGHazardAfflicted = NewValue;
- }
-
- void SetKnownPositiveRefCount() {
- DEBUG(dbgs() << "Setting Known Positive.\n");
- KnownPositiveRefCount = true;
- }
-
- void ClearKnownPositiveRefCount() {
- DEBUG(dbgs() << "Clearing Known Positive.\n");
- KnownPositiveRefCount = false;
- }
-
- bool HasKnownPositiveRefCount() const {
- return KnownPositiveRefCount;
- }
-
- void SetSeq(Sequence NewSeq) {
- DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
- Seq = NewSeq;
- }
-
- Sequence GetSeq() const {
- return static_cast<Sequence>(Seq);
- }
-
- void ClearSequenceProgress() {
- ResetSequenceProgress(S_None);
- }
-
- void ResetSequenceProgress(Sequence NewSeq) {
- DEBUG(dbgs() << "Resetting sequence progress.\n");
- SetSeq(NewSeq);
- Partial = false;
- RRI.clear();
- }
-
- void Merge(const PtrState &Other, bool TopDown);
-
- void InsertCall(Instruction *I) {
- RRI.Calls.insert(I);
- }
-
- void InsertReverseInsertPt(Instruction *I) {
- RRI.ReverseInsertPts.insert(I);
- }
-
- void ClearReverseInsertPts() {
- RRI.ReverseInsertPts.clear();
- }
-
- bool HasReverseInsertPts() const {
- return !RRI.ReverseInsertPts.empty();
- }
-
- const RRInfo &GetRRInfo() const {
- return RRI;
- }
- };
-}
-
-void
-PtrState::Merge(const PtrState &Other, bool TopDown) {
- Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown);
- KnownPositiveRefCount &= Other.KnownPositiveRefCount;
-
- // If we're not in a sequence (anymore), drop all associated state.
- if (Seq == S_None) {
- Partial = false;
- RRI.clear();
- } else if (Partial || Other.Partial) {
- // If we're doing a merge on a path that's previously seen a partial
- // merge, conservatively drop the sequence, to avoid doing partial
- // RR elimination. If the branch predicates for the two merge differ,
- // mixing them is unsafe.
- ClearSequenceProgress();
- } else {
- // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this
- // point, we know that currently we are not partial. Stash whether or not
- // the merge operation caused us to undergo a partial merging of reverse
- // insertion points.
- Partial = RRI.Merge(Other.RRI);
- }
-}
-
-namespace {
/// \brief Per-BasicBlock state.
class BBState {
/// The number of unique control paths from the entry which can reach this
@@ -566,20 +186,18 @@ namespace {
/// The number of unique control paths to exits from this block.
unsigned BottomUpPathCount;
- /// A type for PerPtrTopDown and PerPtrBottomUp.
- typedef MapVector<const Value *, PtrState> MapTy;
-
/// The top-down traversal uses this to record information known about a
/// pointer at the bottom of each block.
- MapTy PerPtrTopDown;
+ BlotMapVector<const Value *, TopDownPtrState> PerPtrTopDown;
/// The bottom-up traversal uses this to record information known about a
/// pointer at the top of each block.
- MapTy PerPtrBottomUp;
+ BlotMapVector<const Value *, BottomUpPtrState> PerPtrBottomUp;
/// Effective predecessors of the current block ignoring ignorable edges and
/// ignored backedges.
SmallVector<BasicBlock *, 2> Preds;
+
/// Effective successors of the current block ignoring ignorable edges and
/// ignored backedges.
SmallVector<BasicBlock *, 2> Succs;
@@ -589,26 +207,38 @@ namespace {
BBState() : TopDownPathCount(0), BottomUpPathCount(0) { }
- typedef MapTy::iterator ptr_iterator;
- typedef MapTy::const_iterator ptr_const_iterator;
+ typedef decltype(PerPtrTopDown)::iterator top_down_ptr_iterator;
+ typedef decltype(PerPtrTopDown)::const_iterator const_top_down_ptr_iterator;
- ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
- ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
- ptr_const_iterator top_down_ptr_begin() const {
+ top_down_ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+ top_down_ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+ const_top_down_ptr_iterator top_down_ptr_begin() const {
return PerPtrTopDown.begin();
}
- ptr_const_iterator top_down_ptr_end() const {
+ const_top_down_ptr_iterator top_down_ptr_end() const {
return PerPtrTopDown.end();
}
+ bool hasTopDownPtrs() const {
+ return !PerPtrTopDown.empty();
+ }
+
+ typedef decltype(PerPtrBottomUp)::iterator bottom_up_ptr_iterator;
+ typedef decltype(
+ PerPtrBottomUp)::const_iterator const_bottom_up_ptr_iterator;
- ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
- ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
- ptr_const_iterator bottom_up_ptr_begin() const {
+ bottom_up_ptr_iterator bottom_up_ptr_begin() {
return PerPtrBottomUp.begin();
}
- ptr_const_iterator bottom_up_ptr_end() const {
+ bottom_up_ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+ const_bottom_up_ptr_iterator bottom_up_ptr_begin() const {
+ return PerPtrBottomUp.begin();
+ }
+ const_bottom_up_ptr_iterator bottom_up_ptr_end() const {
return PerPtrBottomUp.end();
}
+ bool hasBottomUpPtrs() const {
+ return !PerPtrBottomUp.empty();
+ }
/// Mark this block as being an entry block, which has one path from the
/// entry by definition.
@@ -621,20 +251,20 @@ namespace {
/// Attempt to find the PtrState object describing the top down state for
/// pointer Arg. Return a new initialized PtrState describing the top down
/// state for Arg if we do not find one.
- PtrState &getPtrTopDownState(const Value *Arg) {
+ TopDownPtrState &getPtrTopDownState(const Value *Arg) {
return PerPtrTopDown[Arg];
}
/// Attempt to find the PtrState object describing the bottom up state for
/// pointer Arg. Return a new initialized PtrState describing the bottom up
/// state for Arg if we do not find one.
- PtrState &getPtrBottomUpState(const Value *Arg) {
+ BottomUpPtrState &getPtrBottomUpState(const Value *Arg) {
return PerPtrBottomUp[Arg];
}
/// Attempt to find the PtrState object describing the bottom up state for
/// pointer Arg.
- ptr_iterator findPtrBottomUpState(const Value *Arg) {
+ bottom_up_ptr_iterator findPtrBottomUpState(const Value *Arg) {
return PerPtrBottomUp.find(Arg);
}
@@ -685,6 +315,11 @@ namespace {
const unsigned BBState::OverflowOccurredValue = 0xffffffff;
}
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS,
+ BBState &BBState) LLVM_ATTRIBUTE_UNUSED;
+}
+
void BBState::InitFromPred(const BBState &Other) {
PerPtrTopDown = Other.PerPtrTopDown;
TopDownPathCount = Other.TopDownPathCount;
@@ -724,19 +359,18 @@ void BBState::MergePred(const BBState &Other) {
// For each entry in the other set, if our set has an entry with the same key,
// merge the entries. Otherwise, copy the entry and merge it with an empty
// entry.
- for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
- ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
- std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
- Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ for (auto MI = Other.top_down_ptr_begin(), ME = Other.top_down_ptr_end();
+ MI != ME; ++MI) {
+ auto Pair = PerPtrTopDown.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? TopDownPtrState() : MI->second,
/*TopDown=*/true);
}
// For each entry in our set, if the other set doesn't have an entry with the
// same key, force it to merge with an empty entry.
- for (ptr_iterator MI = top_down_ptr_begin(),
- ME = top_down_ptr_end(); MI != ME; ++MI)
+ for (auto MI = top_down_ptr_begin(), ME = top_down_ptr_end(); MI != ME; ++MI)
if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
- MI->second.Merge(PtrState(), /*TopDown=*/true);
+ MI->second.Merge(TopDownPtrState(), /*TopDown=*/true);
}
/// The bottom-up traversal uses this to merge information about successors to
@@ -768,304 +402,80 @@ void BBState::MergeSucc(const BBState &Other) {
// For each entry in the other set, if our set has an entry with the
// same key, merge the entries. Otherwise, copy the entry and merge
// it with an empty entry.
- for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
- ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
- std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
- Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+ for (auto MI = Other.bottom_up_ptr_begin(), ME = Other.bottom_up_ptr_end();
+ MI != ME; ++MI) {
+ auto Pair = PerPtrBottomUp.insert(*MI);
+ Pair.first->second.Merge(Pair.second ? BottomUpPtrState() : MI->second,
/*TopDown=*/false);
}
// For each entry in our set, if the other set doesn't have an entry
// with the same key, force it to merge with an empty entry.
- for (ptr_iterator MI = bottom_up_ptr_begin(),
- ME = bottom_up_ptr_end(); MI != ME; ++MI)
+ for (auto MI = bottom_up_ptr_begin(), ME = bottom_up_ptr_end(); MI != ME;
+ ++MI)
if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
- MI->second.Merge(PtrState(), /*TopDown=*/false);
+ MI->second.Merge(BottomUpPtrState(), /*TopDown=*/false);
}
-// Only enable ARC Annotations if we are building a debug version of
-// libObjCARCOpts.
-#ifndef NDEBUG
-#define ARC_ANNOTATIONS
-#endif
-
-// Define some macros along the lines of DEBUG and some helper functions to make
-// it cleaner to create annotations in the source code and to no-op when not
-// building in debug mode.
-#ifdef ARC_ANNOTATIONS
-
-#include "llvm/Support/CommandLine.h"
-
-/// Enable/disable ARC sequence annotations.
-static cl::opt<bool>
-EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
- cl::desc("Enable emission of arc data flow analysis "
- "annotations"));
-static cl::opt<bool>
-DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
- cl::desc("Disable check for cfg hazards when "
- "annotating"));
-static cl::opt<std::string>
-ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
- cl::init(""),
- cl::desc("filter out all data flow annotations "
- "but those that apply to the given "
- "target llvm identifier."));
-
-/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
-/// instruction so that we can track backwards when post processing via the llvm
-/// arc annotation processor tool. If the function is an
-static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
- Value *Ptr) {
- MDString *Hash = nullptr;
-
- // If pointer is a result of an instruction and it does not have a source
- // MDNode it, attach a new MDNode onto it. If pointer is a result of
- // an instruction and does have a source MDNode attached to it, return a
- // reference to said Node. Otherwise just return 0.
- if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) {
- MDNode *Node;
- if (!(Node = Inst->getMetadata(NodeId))) {
- // We do not have any node. Generate and attatch the hash MDString to the
- // instruction.
-
- // We just use an MDString to ensure that this metadata gets written out
- // of line at the module level and to provide a very simple format
- // encoding the information herein. Both of these makes it simpler to
- // parse the annotations by a simple external program.
- std::string Str;
- raw_string_ostream os(Str);
- os << "(" << Inst->getParent()->getParent()->getName() << ",%"
- << Inst->getName() << ")";
-
- Hash = MDString::get(Inst->getContext(), os.str());
- Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash));
- } else {
- // We have a node. Grab its hash and return it.
- assert(Node->getNumOperands() == 1 &&
- "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand.");
- Hash = cast<MDString>(Node->getOperand(0));
+raw_ostream &llvm::operator<<(raw_ostream &OS, BBState &BBInfo) {
+ // Dump the pointers we are tracking.
+ OS << " TopDown State:\n";
+ if (!BBInfo.hasTopDownPtrs()) {
+ DEBUG(llvm::dbgs() << " NONE!\n");
+ } else {
+ for (auto I = BBInfo.top_down_ptr_begin(), E = BBInfo.top_down_ptr_end();
+ I != E; ++I) {
+ const PtrState &P = I->second;
+ OS << " Ptr: " << *I->first
+ << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false")
+ << "\n ImpreciseRelease: "
+ << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
+ << " HasCFGHazards: "
+ << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
+ << " KnownPositive: "
+ << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
+ << " Seq: "
+ << P.GetSeq() << "\n";
}
- } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
- std::string str;
- raw_string_ostream os(str);
- os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName()
- << ")";
- Hash = MDString::get(Arg->getContext(), os.str());
- }
-
- return Hash;
-}
-
-static std::string SequenceToString(Sequence A) {
- std::string str;
- raw_string_ostream os(str);
- os << A;
- return os.str();
-}
-
-/// Helper function to change a Sequence into a String object using our overload
-/// for raw_ostream so we only have printing code in one location.
-static MDString *SequenceToMDString(LLVMContext &Context,
- Sequence A) {
- return MDString::get(Context, SequenceToString(A));
-}
-
-/// A simple function to generate a MDNode which describes the change in state
-/// for Value *Ptr caused by Instruction *Inst.
-static void AppendMDNodeToInstForPtr(unsigned NodeId,
- Instruction *Inst,
- Value *Ptr,
- MDString *PtrSourceMDNodeID,
- Sequence OldSeq,
- Sequence NewSeq) {
- MDNode *Node = nullptr;
- Metadata *tmp[3] = {PtrSourceMDNodeID,
- SequenceToMDString(Inst->getContext(), OldSeq),
- SequenceToMDString(Inst->getContext(), NewSeq)};
- Node = MDNode::get(Inst->getContext(), tmp);
-
- Inst->setMetadata(NodeId, Node);
-}
-
-/// Add to the beginning of the basic block llvm.ptr.annotations which show the
-/// state of a pointer at the entrance to a basic block.
-static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
- Value *Ptr, Sequence Seq) {
- // If we have a target identifier, make sure that we match it before
- // continuing.
- if(!ARCAnnotationTargetIdentifier.empty() &&
- !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
- return;
-
- Module *M = BB->getParent()->getParent();
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *I8XX = PointerType::getUnqual(I8X);
- Type *Params[] = {I8XX, I8XX};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params,
- /*isVarArg=*/false);
- Constant *Callee = M->getOrInsertFunction(Name, FTy);
-
- IRBuilder<> Builder(BB, BB->getFirstInsertionPt());
-
- Value *PtrName;
- StringRef Tmp = Ptr->getName();
- if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
- Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
- Tmp + "_STR");
- PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
- cast<Constant>(ActualPtrName), Tmp);
- }
-
- Value *S;
- std::string SeqStr = SequenceToString(Seq);
- if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
- Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
- SeqStr + "_STR");
- S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
- cast<Constant>(ActualPtrName), SeqStr);
- }
-
- Builder.CreateCall2(Callee, PtrName, S);
-}
-
-/// Add to the end of the basic block llvm.ptr.annotations which show the state
-/// of the pointer at the bottom of the basic block.
-static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
- Value *Ptr, Sequence Seq) {
- // If we have a target identifier, make sure that we match it before emitting
- // an annotation.
- if(!ARCAnnotationTargetIdentifier.empty() &&
- !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
- return;
-
- Module *M = BB->getParent()->getParent();
- LLVMContext &C = M->getContext();
- Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
- Type *I8XX = PointerType::getUnqual(I8X);
- Type *Params[] = {I8XX, I8XX};
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), Params,
- /*isVarArg=*/false);
- Constant *Callee = M->getOrInsertFunction(Name, FTy);
-
- IRBuilder<> Builder(BB, std::prev(BB->end()));
-
- Value *PtrName;
- StringRef Tmp = Ptr->getName();
- if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
- Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
- Tmp + "_STR");
- PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
- cast<Constant>(ActualPtrName), Tmp);
}
- Value *S;
- std::string SeqStr = SequenceToString(Seq);
- if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
- Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
- SeqStr + "_STR");
- S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
- cast<Constant>(ActualPtrName), SeqStr);
+ OS << " BottomUp State:\n";
+ if (!BBInfo.hasBottomUpPtrs()) {
+ DEBUG(llvm::dbgs() << " NONE!\n");
+ } else {
+ for (auto I = BBInfo.bottom_up_ptr_begin(), E = BBInfo.bottom_up_ptr_end();
+ I != E; ++I) {
+ const PtrState &P = I->second;
+ OS << " Ptr: " << *I->first
+ << "\n KnownSafe: " << (P.IsKnownSafe()?"true":"false")
+ << "\n ImpreciseRelease: "
+ << (P.IsTrackingImpreciseReleases()?"true":"false") << "\n"
+ << " HasCFGHazards: "
+ << (P.IsCFGHazardAfflicted()?"true":"false") << "\n"
+ << " KnownPositive: "
+ << (P.HasKnownPositiveRefCount()?"true":"false") << "\n"
+ << " Seq: "
+ << P.GetSeq() << "\n";
+ }
}
- Builder.CreateCall2(Callee, PtrName, S);
-}
-/// Adds a source annotation to pointer and a state change annotation to Inst
-/// referencing the source annotation and the old/new state of pointer.
-static void GenerateARCAnnotation(unsigned InstMDId,
- unsigned PtrMDId,
- Instruction *Inst,
- Value *Ptr,
- Sequence OldSeq,
- Sequence NewSeq) {
- if (EnableARCAnnotations) {
- // If we have a target identifier, make sure that we match it before
- // emitting an annotation.
- if(!ARCAnnotationTargetIdentifier.empty() &&
- !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
- return;
-
- // First generate the source annotation on our pointer. This will return an
- // MDString* if Ptr actually comes from an instruction implying we can put
- // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
- // then we know that our pointer is from an Argument so we put a reference
- // to the argument number.
- //
- // The point of this is to make it easy for the
- // llvm-arc-annotation-processor tool to cross reference where the source
- // pointer is in the LLVM IR since the LLVM IR parser does not submit such
- // information via debug info for backends to use (since why would anyone
- // need such a thing from LLVM IR besides in non-standard cases
- // [i.e. this]).
- MDString *SourcePtrMDNode =
- AppendMDNodeToSourcePtr(PtrMDId, Ptr);
- AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq,
- NewSeq);
- }
+ return OS;
}
-// The actual interface for accessing the above functionality is defined via
-// some simple macros which are defined below. We do this so that the user does
-// not need to pass in what metadata id is needed resulting in cleaner code and
-// additionally since it provides an easy way to conditionally no-op all
-// annotation support in a non-debug build.
-
-/// Use this macro to annotate a sequence state change when processing
-/// instructions bottom up,
-#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \
- GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \
- ARCAnnotationProvenanceSourceMDKind, (inst), \
- const_cast<Value*>(ptr), (old), (new))
-/// Use this macro to annotate a sequence state change when processing
-/// instructions top down.
-#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \
- GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \
- ARCAnnotationProvenanceSourceMDKind, (inst), \
- const_cast<Value*>(ptr), (old), (new))
-
-#define ANNOTATE_BB(_states, _bb, _name, _type, _direction) \
- do { \
- if (EnableARCAnnotations) { \
- for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
- E = (_states)._direction##_ptr_end(); I != E; ++I) { \
- Value *Ptr = const_cast<Value*>(I->first); \
- Sequence Seq = I->second.GetSeq(); \
- GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq); \
- } \
- } \
- } while (0)
-
-#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
- Entrance, bottom_up)
-#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
- Terminator, bottom_up)
-#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
- Entrance, top_down)
-#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
- ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
- Terminator, top_down)
-
-#else // !ARC_ANNOTATION
-// If annotations are off, noop.
-#define ANNOTATE_BOTTOMUP(inst, ptr, old, new)
-#define ANNOTATE_TOPDOWN(inst, ptr, old, new)
-#define ANNOTATE_BOTTOMUP_BBSTART(states, basicblock)
-#define ANNOTATE_BOTTOMUP_BBEND(states, basicblock)
-#define ANNOTATE_TOPDOWN_BBSTART(states, basicblock)
-#define ANNOTATE_TOPDOWN_BBEND(states, basicblock)
-#endif // !ARC_ANNOTATION
-
namespace {
+
/// \brief The main ARC optimization pass.
class ObjCARCOpt : public FunctionPass {
bool Changed;
ProvenanceAnalysis PA;
+
+ /// A cache of references to runtime entry point constants.
ARCRuntimeEntryPoints EP;
+ /// A cache of MDKinds that can be passed into other functions to propagate
+ /// MDKind identifiers.
+ ARCMDKindCache MDKindCache;
+
// This is used to track if a pointer is stored into an alloca.
DenseSet<const Value *> MultiOwnersSet;
@@ -1076,24 +486,6 @@ namespace {
/// is in fact used in the current function.
unsigned UsedInThisFunction;
- /// The Metadata Kind for clang.imprecise_release metadata.
- unsigned ImpreciseReleaseMDKind;
-
- /// The Metadata Kind for clang.arc.copy_on_escape metadata.
- unsigned CopyOnEscapeMDKind;
-
- /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
- unsigned NoObjCARCExceptionsMDKind;
-
-#ifdef ARC_ANNOTATIONS
- /// The Metadata Kind for llvm.arc.annotation.bottomup metadata.
- unsigned ARCAnnotationBottomUpMDKind;
- /// The Metadata Kind for llvm.arc.annotation.topdown metadata.
- unsigned ARCAnnotationTopDownMDKind;
- /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata.
- unsigned ARCAnnotationProvenanceSourceMDKind;
-#endif // ARC_ANNOATIONS
-
bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
ARCInstKind &Class);
@@ -1102,47 +494,41 @@ namespace {
void CheckForCFGHazards(const BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
BBState &MyStates) const;
- bool VisitInstructionBottomUp(Instruction *Inst,
- BasicBlock *BB,
- MapVector<Value *, RRInfo> &Retains,
+ bool VisitInstructionBottomUp(Instruction *Inst, BasicBlock *BB,
+ BlotMapVector<Value *, RRInfo> &Retains,
BBState &MyStates);
bool VisitBottomUp(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains);
+ BlotMapVector<Value *, RRInfo> &Retains);
bool VisitInstructionTopDown(Instruction *Inst,
DenseMap<Value *, RRInfo> &Releases,
BBState &MyStates);
bool VisitTopDown(BasicBlock *BB,
DenseMap<const BasicBlock *, BBState> &BBStates,
DenseMap<Value *, RRInfo> &Releases);
- bool Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
+ bool Visit(Function &F, DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases);
void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
- MapVector<Value *, RRInfo> &Retains,
+ BlotMapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
- SmallVectorImpl<Instruction *> &DeadInsts,
- Module *M);
-
- bool ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M,
- SmallVectorImpl<Instruction *> &NewRetains,
- SmallVectorImpl<Instruction *> &NewReleases,
- SmallVectorImpl<Instruction *> &DeadInsts,
- RRInfo &RetainsToMove,
- RRInfo &ReleasesToMove,
- Value *Arg,
- bool KnownSafe,
- bool &AnyPairsCompletelyEliminated);
+ SmallVectorImpl<Instruction *> &DeadInsts, Module *M);
+
+ bool
+ PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M,
+ SmallVectorImpl<Instruction *> &NewRetains,
+ SmallVectorImpl<Instruction *> &NewReleases,
+ SmallVectorImpl<Instruction *> &DeadInsts,
+ RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+ Value *Arg, bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated);
bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M);
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M);
void OptimizeWeakCalls(Function &F);
@@ -1238,7 +624,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
"objc_retain since the operand is not a return value.\n"
"Old = " << *RetainRV << "\n");
- Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Retain);
cast<CallInst>(RetainRV)->setCalledFunction(NewDecl);
DEBUG(dbgs() << "New = " << *RetainRV << "\n");
@@ -1274,7 +660,7 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
"Old = " << *AutoreleaseRV << "\n");
CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
- Constant *NewDecl = EP.get(ARCRuntimeEntryPoints::EPT_Autorelease);
+ Constant *NewDecl = EP.get(ARCRuntimeEntryPointKind::Autorelease);
AutoreleaseRVCI->setCalledFunction(NewDecl);
AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
Class = ARCInstKind::Autorelease;
@@ -1380,10 +766,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
// Create the declaration lazily.
LLVMContext &C = Inst->getContext();
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
CallInst *NewCall = CallInst::Create(Decl, Call->getArgOperand(0), "",
Call);
- NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
+ NewCall->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease),
+ MDNode::get(C, None));
DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
"since x is otherwise unused.\nOld: " << *Call << "\nNew: "
@@ -1547,7 +934,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
/// no CFG hazards by checking the states of various bottom up pointers.
static void CheckForUseCFGHazard(const Sequence SuccSSeq,
const bool SuccSRRIKnownSafe,
- PtrState &S,
+ TopDownPtrState &S,
bool &SomeSuccHasSame,
bool &AllSuccsHaveSame,
bool &NotAllSeqEqualButKnownSafe,
@@ -1585,7 +972,7 @@ static void CheckForUseCFGHazard(const Sequence SuccSSeq,
/// pointers.
static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
const bool SuccSRRIKnownSafe,
- PtrState &S,
+ TopDownPtrState &S,
bool &SomeSuccHasSame,
bool &AllSuccsHaveSame,
bool &NotAllSeqEqualButKnownSafe) {
@@ -1618,9 +1005,9 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
BBState &MyStates) const {
// If any top-down local-use or possible-dec has a succ which is earlier in
// the sequence, forget it.
- for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
- E = MyStates.top_down_ptr_end(); I != E; ++I) {
- PtrState &S = I->second;
+ for (auto I = MyStates.top_down_ptr_begin(), E = MyStates.top_down_ptr_end();
+ I != E; ++I) {
+ TopDownPtrState &S = I->second;
const Sequence Seq = I->second.GetSeq();
// We only care about S_Retain, S_CanRelease, and S_Use.
@@ -1646,7 +1033,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
const DenseMap<const BasicBlock *, BBState>::iterator BBI =
BBStates.find(*SI);
assert(BBI != BBStates.end());
- const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+ const BottomUpPtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
const Sequence SuccSSeq = SuccS.GetSeq();
// If bottom up, the pointer is in an S_None state, clear the sequence
@@ -1705,44 +1092,21 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
}
}
-bool
-ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
- BasicBlock *BB,
- MapVector<Value *, RRInfo> &Retains,
- BBState &MyStates) {
+bool ObjCARCOpt::VisitInstructionBottomUp(
+ Instruction *Inst, BasicBlock *BB, BlotMapVector<Value *, RRInfo> &Retains,
+ BBState &MyStates) {
bool NestingDetected = false;
ARCInstKind Class = GetARCInstKind(Inst);
const Value *Arg = nullptr;
- DEBUG(dbgs() << "Class: " << Class << "\n");
+ DEBUG(dbgs() << " Class: " << Class << "\n");
switch (Class) {
case ARCInstKind::Release: {
Arg = GetArgRCIdentityRoot(Inst);
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
-
- // If we see two releases in a row on the same pointer. If so, make
- // a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second release, which may allow us to
- // eliminate the first release too.
- // Theoretically we could implement removal of nested retain+release
- // pairs by making PtrState hold a stack of states, but this is
- // simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
- DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
- NestingDetected = true;
- }
-
- MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
- Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
- ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq);
- S.ResetSequenceProgress(NewSeq);
- S.SetReleaseMetadata(ReleaseMetadata);
- S.SetKnownSafe(S.HasKnownPositiveRefCount());
- S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall());
- S.InsertCall(Inst);
- S.SetKnownPositiveRefCount();
+ BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
+ NestingDetected |= S.InitBottomUp(MDKindCache, Inst);
break;
}
case ARCInstKind::RetainBlock:
@@ -1753,35 +1117,16 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
case ARCInstKind::Retain:
case ARCInstKind::RetainRV: {
Arg = GetArgRCIdentityRoot(Inst);
-
- PtrState &S = MyStates.getPtrBottomUpState(Arg);
- S.SetKnownPositiveRefCount();
-
- Sequence OldSeq = S.GetSeq();
- switch (OldSeq) {
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- case S_Use:
- // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
- // imprecise release, clear our reverse insertion points.
- if (OldSeq != S_Use || S.IsTrackingImpreciseReleases())
- S.ClearReverseInsertPts();
- // FALL THROUGH
- case S_CanRelease:
- // Don't do retain+release tracking for ARCInstKind::RetainRV,
- // because it's
- // better to let it remain as the first instruction after a call.
- if (Class != ARCInstKind::RetainRV)
+ BottomUpPtrState &S = MyStates.getPtrBottomUpState(Arg);
+ if (S.MatchWithRetain()) {
+ // Don't do retain+release tracking for ARCInstKind::RetainRV, because
+ // it's better to let it remain as the first instruction after a call.
+ if (Class != ARCInstKind::RetainRV) {
+ DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n");
Retains[Inst] = S.GetRRInfo();
+ }
S.ClearSequenceProgress();
- break;
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
}
- ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
// A retain moving bottom up can be a use.
break;
}
@@ -1807,9 +1152,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// in the presence of allocas we only unconditionally remove pointers if
// both our retain and our release are KnownSafe.
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand())) {
- BBState::ptr_iterator I = MyStates.findPtrBottomUpState(
- GetRCIdentityRoot(SI->getValueOperand()));
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+ if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand(), DL)) {
+ auto I = MyStates.findPtrBottomUpState(
+ GetRCIdentityRoot(SI->getValueOperand()));
if (I != MyStates.bottom_up_ptr_end())
MultiOwnersSet.insert(I->first);
}
@@ -1821,90 +1167,26 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
// Consider any other possible effects of this instruction on each
// pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
- ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+ for (auto MI = MyStates.bottom_up_ptr_begin(),
+ ME = MyStates.bottom_up_ptr_end();
+ MI != ME; ++MI) {
const Value *Ptr = MI->first;
if (Ptr == Arg)
continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
+ BottomUpPtrState &S = MI->second;
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
- << "\n");
- S.ClearKnownPositiveRefCount();
- switch (Seq) {
- case S_Use:
- S.SetSeq(S_CanRelease);
- ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq());
- continue;
- case S_CanRelease:
- case S_Release:
- case S_MovableRelease:
- case S_Stop:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
- }
+ if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
+ continue;
- // Check for possible direct uses.
- switch (Seq) {
- case S_Release:
- case S_MovableRelease:
- if (CanUse(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
- << "\n");
- assert(!S.HasReverseInsertPts());
- // If this is an invoke instruction, we're scanning it as part of
- // one of its successor blocks, since we can't insert code after it
- // in its own block, and we don't want to split critical edges.
- if (isa<InvokeInst>(Inst))
- S.InsertReverseInsertPt(BB->getFirstInsertionPt());
- else
- S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
- S.SetSeq(S_Use);
- ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
- } else if (Seq == S_Release && IsUser(Class)) {
- DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
- << "\n");
- // Non-movable releases depend on any possible objc pointer use.
- S.SetSeq(S_Stop);
- ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
- assert(!S.HasReverseInsertPts());
- // As above; handle invoke specially.
- if (isa<InvokeInst>(Inst))
- S.InsertReverseInsertPt(BB->getFirstInsertionPt());
- else
- S.InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
- }
- break;
- case S_Stop:
- if (CanUse(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
- << "\n");
- S.SetSeq(S_Use);
- ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
- }
- break;
- case S_CanRelease:
- case S_Use:
- case S_None:
- break;
- case S_Retain:
- llvm_unreachable("bottom-up pointer in retain state!");
- }
+ S.HandlePotentialUse(BB, Inst, Ptr, PA, Class);
}
return NestingDetected;
}
-bool
-ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains) {
+bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains) {
DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
@@ -1929,9 +1211,8 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
}
}
- // If ARC Annotations are enabled, output the current state of pointers at the
- // bottom of the basic block.
- ANNOTATE_BOTTOMUP_BBEND(MyStates, BB);
+ DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n"
+ << "Performing Dataflow:\n");
// Visit all the instructions, bottom-up.
for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
@@ -1941,7 +1222,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
if (isa<InvokeInst>(Inst))
continue;
- DEBUG(dbgs() << "Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << " Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
}
@@ -1956,9 +1237,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
}
- // If ARC Annotations are enabled, output the current state of pointers at the
- // top of the basic block.
- ANNOTATE_BOTTOMUP_BBSTART(MyStates, BB);
+ DEBUG(llvm::dbgs() << "\nFinal State:\n" << BBStates[BB] << "\n");
return NestingDetected;
}
@@ -1971,144 +1250,63 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
ARCInstKind Class = GetARCInstKind(Inst);
const Value *Arg = nullptr;
+ DEBUG(llvm::dbgs() << " Class: " << Class << "\n");
+
switch (Class) {
case ARCInstKind::RetainBlock:
// In OptimizeIndividualCalls, we have strength reduced all optimizable
// objc_retainBlocks to objc_retains. Thus at this point any
- // objc_retainBlocks that we see are not optimizable.
+ // objc_retainBlocks that we see are not optimizable. We need to break since
+ // a retain can be a potential use.
break;
case ARCInstKind::Retain:
case ARCInstKind::RetainRV: {
Arg = GetArgRCIdentityRoot(Inst);
-
- PtrState &S = MyStates.getPtrTopDownState(Arg);
-
- // Don't do retain+release tracking for ARCInstKind::RetainRV, because
- // it's
- // better to let it remain as the first instruction after a call.
- if (Class != ARCInstKind::RetainRV) {
- // If we see two retains in a row on the same pointer. If so, make
- // a note, and we'll cicle back to revisit it after we've
- // hopefully eliminated the second retain, which may allow us to
- // eliminate the first retain too.
- // Theoretically we could implement removal of nested retain+release
- // pairs by making PtrState hold a stack of states, but this is
- // simple and avoids adding overhead for the non-nested case.
- if (S.GetSeq() == S_Retain)
- NestingDetected = true;
-
- ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain);
- S.ResetSequenceProgress(S_Retain);
- S.SetKnownSafe(S.HasKnownPositiveRefCount());
- S.InsertCall(Inst);
- }
-
- S.SetKnownPositiveRefCount();
-
+ TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
+ NestingDetected |= S.InitTopDown(Class, Inst);
// A retain can be a potential use; procede to the generic checking
// code below.
break;
}
case ARCInstKind::Release: {
Arg = GetArgRCIdentityRoot(Inst);
-
- PtrState &S = MyStates.getPtrTopDownState(Arg);
- S.ClearKnownPositiveRefCount();
-
- Sequence OldSeq = S.GetSeq();
-
- MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-
- switch (OldSeq) {
- case S_Retain:
- case S_CanRelease:
- if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
- S.ClearReverseInsertPts();
- // FALL THROUGH
- case S_Use:
- S.SetReleaseMetadata(ReleaseMetadata);
- S.SetTailCallRelease(cast<CallInst>(Inst)->isTailCall());
+ TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
+ // Try to form a tentative pair in between this release instruction and the
+ // top down pointers that we are tracking.
+ if (S.MatchWithRelease(MDKindCache, Inst)) {
+ // If we succeed, copy S's RRInfo into the Release -> {Retain Set
+ // Map}. Then we clear S.
+ DEBUG(llvm::dbgs() << " Matching with: " << *Inst << "\n");
Releases[Inst] = S.GetRRInfo();
- ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
S.ClearSequenceProgress();
- break;
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
}
break;
}
case ARCInstKind::AutoreleasepoolPop:
// Conservatively, clear MyStates for all known pointers.
MyStates.clearTopDownPointers();
- return NestingDetected;
+ return false;
case ARCInstKind::AutoreleasepoolPush:
case ARCInstKind::None:
- // These are irrelevant.
- return NestingDetected;
+ // These can not be uses of
+ return false;
default:
break;
}
// Consider any other possible effects of this instruction on each
// pointer being tracked.
- for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
- ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+ for (auto MI = MyStates.top_down_ptr_begin(),
+ ME = MyStates.top_down_ptr_end();
+ MI != ME; ++MI) {
const Value *Ptr = MI->first;
if (Ptr == Arg)
continue; // Handled above.
- PtrState &S = MI->second;
- Sequence Seq = S.GetSeq();
-
- // Check for possible releases.
- if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
- << "\n");
- S.ClearKnownPositiveRefCount();
- switch (Seq) {
- case S_Retain:
- S.SetSeq(S_CanRelease);
- ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease);
- assert(!S.HasReverseInsertPts());
- S.InsertReverseInsertPt(Inst);
-
- // One call can't cause a transition from S_Retain to S_CanRelease
- // and S_CanRelease to S_Use. If we've made the first transition,
- // we're done.
- continue;
- case S_Use:
- case S_CanRelease:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
- }
+ TopDownPtrState &S = MI->second;
+ if (S.HandlePotentialAlterRefCount(Inst, Ptr, PA, Class))
+ continue;
- // Check for possible direct uses.
- switch (Seq) {
- case S_CanRelease:
- if (CanUse(Inst, Ptr, PA, Class)) {
- DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
- << "\n");
- S.SetSeq(S_Use);
- ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
- }
- break;
- case S_Retain:
- case S_Use:
- case S_None:
- break;
- case S_Stop:
- case S_Release:
- case S_MovableRelease:
- llvm_unreachable("top-down pointer in release state!");
- }
+ S.HandlePotentialUse(Inst, Ptr, PA, Class);
}
return NestingDetected;
@@ -2140,27 +1338,22 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
}
}
- // If ARC Annotations are enabled, output the current state of pointers at the
- // top of the basic block.
- ANNOTATE_TOPDOWN_BBSTART(MyStates, BB);
+ DEBUG(llvm::dbgs() << "Before:\n" << BBStates[BB] << "\n"
+ << "Performing Dataflow:\n");
// Visit all the instructions, top-down.
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
Instruction *Inst = I;
- DEBUG(dbgs() << "Visiting " << *Inst << "\n");
+ DEBUG(dbgs() << " Visiting " << *Inst << "\n");
NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
}
- // If ARC Annotations are enabled, output the current state of pointers at the
- // bottom of the basic block.
- ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
-
-#ifdef ARC_ANNOTATIONS
- if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
-#endif
+ DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n"
+ << BBStates[BB] << "\n\n");
CheckForCFGHazards(BB, BBStates, MyStates);
+ DEBUG(llvm::dbgs() << "Final State:\n" << BBStates[BB] << "\n");
return NestingDetected;
}
@@ -2246,11 +1439,10 @@ ComputePostOrders(Function &F,
}
// Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
- DenseMap<const BasicBlock *, BBState> &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases) {
+bool ObjCARCOpt::Visit(Function &F,
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases) {
// Use reverse-postorder traversals, because we magically know that loops
// will be well behaved, i.e. they won't repeatedly call retain on a single
@@ -2260,7 +1452,7 @@ ObjCARCOpt::Visit(Function &F,
SmallVector<BasicBlock *, 16> PostOrder;
SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
- NoObjCARCExceptionsMDKind,
+ MDKindCache.get(ARCMDKindID::NoObjCARCExceptions),
BBStates);
// Use reverse-postorder on the reverse CFG for bottom-up.
@@ -2281,10 +1473,9 @@ ObjCARCOpt::Visit(Function &F,
}
/// Move the calls in RetainsToMove and ReleasesToMove.
-void ObjCARCOpt::MoveCalls(Value *Arg,
- RRInfo &RetainsToMove,
+void ObjCARCOpt::MoveCalls(Value *Arg, RRInfo &RetainsToMove,
RRInfo &ReleasesToMove,
- MapVector<Value *, RRInfo> &Retains,
+ BlotMapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases,
SmallVectorImpl<Instruction *> &DeadInsts,
Module *M) {
@@ -2297,7 +1488,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
for (Instruction *InsertPt : ReleasesToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
Call->setDoesNotThrow();
Call->setTailCall();
@@ -2308,11 +1499,11 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
for (Instruction *InsertPt : RetainsToMove.ReverseInsertPts) {
Value *MyArg = ArgTy == ParamTy ? Arg :
new BitCastInst(Arg, ParamTy, "", InsertPt);
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Release);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Release);
CallInst *Call = CallInst::Create(Decl, MyArg, "", InsertPt);
// Attach a clang.imprecise_release metadata tag, if appropriate.
if (MDNode *M = ReleasesToMove.ReleaseMetadata)
- Call->setMetadata(ImpreciseReleaseMDKind, M);
+ Call->setMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease), M);
Call->setDoesNotThrow();
if (ReleasesToMove.IsTailCallRelease)
Call->setTailCall();
@@ -2335,20 +1526,15 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
}
-bool
-ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
- &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M,
- SmallVectorImpl<Instruction *> &NewRetains,
- SmallVectorImpl<Instruction *> &NewReleases,
- SmallVectorImpl<Instruction *> &DeadInsts,
- RRInfo &RetainsToMove,
- RRInfo &ReleasesToMove,
- Value *Arg,
- bool KnownSafe,
- bool &AnyPairsCompletelyEliminated) {
+bool ObjCARCOpt::PairUpRetainsAndReleases(
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M,
+ SmallVectorImpl<Instruction *> &NewRetains,
+ SmallVectorImpl<Instruction *> &NewReleases,
+ SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove,
+ RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe,
+ bool &AnyPairsCompletelyEliminated) {
// If a pair happens in a region where it is known that the reference count
// is already incremented, we can similarly ignore possible decrements unless
// we are dealing with a retainable object with multiple provenance sources.
@@ -2369,15 +1555,14 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
for (SmallVectorImpl<Instruction *>::const_iterator
NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
Instruction *NewRetain = *NI;
- MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+ auto It = Retains.find(NewRetain);
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
KnownSafeTD &= NewRetainRRI.KnownSafe;
MultipleOwners =
MultipleOwners || MultiOwnersSet.count(GetArgRCIdentityRoot(NewRetain));
for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
- DenseMap<Value *, RRInfo>::const_iterator Jt =
- Releases.find(NewRetainRelease);
+ auto Jt = Releases.find(NewRetainRelease);
if (Jt == Releases.end())
return false;
const RRInfo &NewRetainReleaseRRI = Jt->second;
@@ -2446,15 +1631,13 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
for (SmallVectorImpl<Instruction *>::const_iterator
NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
Instruction *NewRelease = *NI;
- DenseMap<Value *, RRInfo>::const_iterator It =
- Releases.find(NewRelease);
+ auto It = Releases.find(NewRelease);
assert(It != Releases.end());
const RRInfo &NewReleaseRRI = It->second;
KnownSafeBU &= NewReleaseRRI.KnownSafe;
CFGHazardAfflicted |= NewReleaseRRI.CFGHazardAfflicted;
for (Instruction *NewReleaseRetain : NewReleaseRRI.Calls) {
- MapVector<Value *, RRInfo>::const_iterator Jt =
- Retains.find(NewReleaseRetain);
+ auto Jt = Retains.find(NewReleaseRetain);
if (Jt == Retains.end())
return false;
const RRInfo &NewReleaseRetainRRI = Jt->second;
@@ -2506,11 +1689,8 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (NewRetains.empty()) break;
}
- // If the pointer is known incremented in 1 direction and we do not have
- // MultipleOwners, we can safely remove the retain/releases. Otherwise we need
- // to be known safe in both directions.
- bool UnconditionallySafe = (KnownSafeTD && KnownSafeBU) ||
- ((KnownSafeTD || KnownSafeBU) && !MultipleOwners);
+ // We can only remove pointers if we are known safe in both directions.
+ bool UnconditionallySafe = KnownSafeTD && KnownSafeBU;
if (UnconditionallySafe) {
RetainsToMove.ReverseInsertPts.clear();
ReleasesToMove.ReverseInsertPts.clear();
@@ -2540,12 +1720,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
if (OldDelta != 0)
return false;
-#ifdef ARC_ANNOTATIONS
- // Do not move calls if ARC annotations are requested.
- if (EnableARCAnnotations)
- return false;
-#endif // ARC_ANNOTATIONS
-
Changed = true;
assert(OldCount != 0 && "Unreachable code?");
NumRRs += OldCount - NewCount;
@@ -2558,12 +1732,10 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
/// Identify pairings between the retains and releases, and delete and/or move
/// them.
-bool
-ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
- &BBStates,
- MapVector<Value *, RRInfo> &Retains,
- DenseMap<Value *, RRInfo> &Releases,
- Module *M) {
+bool ObjCARCOpt::PerformCodePlacement(
+ DenseMap<const BasicBlock *, BBState> &BBStates,
+ BlotMapVector<Value *, RRInfo> &Retains,
+ DenseMap<Value *, RRInfo> &Releases, Module *M) {
DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
bool AnyPairsCompletelyEliminated = false;
@@ -2574,8 +1746,9 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
SmallVector<Instruction *, 8> DeadInsts;
// Visit each retain.
- for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
- E = Retains.end(); I != E; ++I) {
+ for (BlotMapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+ E = Retains.end();
+ I != E; ++I) {
Value *V = I->first;
if (!V) continue; // blotted
@@ -2602,11 +1775,10 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
NewRetains.push_back(Retain);
- bool PerformMoveCalls =
- ConnectTDBUTraversals(BBStates, Retains, Releases, M, NewRetains,
- NewReleases, DeadInsts, RetainsToMove,
- ReleasesToMove, Arg, KnownSafe,
- AnyPairsCompletelyEliminated);
+ bool PerformMoveCalls = PairUpRetainsAndReleases(
+ BBStates, Retains, Releases, M, NewRetains, NewReleases, DeadInsts,
+ RetainsToMove, ReleasesToMove, Arg, KnownSafe,
+ AnyPairsCompletelyEliminated);
if (PerformMoveCalls) {
// Ok, everything checks out and we're all set. Let's move/delete some
@@ -2678,7 +1850,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
if (Class == ARCInstKind::LoadWeakRetained) {
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
@@ -2707,7 +1879,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
Changed = true;
// If the load has a builtin retain, insert a plain retain for it.
if (Class == ARCInstKind::LoadWeakRetained) {
- Constant *Decl = EP.get(ARCRuntimeEntryPoints::EPT_Retain);
+ Constant *Decl = EP.get(ARCRuntimeEntryPointKind::Retain);
CallInst *CI = CallInst::Create(Decl, EarlierCall, "", Call);
CI->setTailCall();
}
@@ -2795,7 +1967,7 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
// map stays valid when we get around to rewriting code and calls get
// replaced by arguments.
DenseMap<Value *, RRInfo> Releases;
- MapVector<Value *, RRInfo> Retains;
+ BlotMapVector<Value *, RRInfo> Retains;
// This is used during the traversal of the function to track the
// states for each identified object at each block.
@@ -2828,8 +2000,7 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
if (DepInsts.size() != 1)
return false;
- CallInst *Call =
- dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ auto *Call = dyn_cast_or_null<CallInst>(*DepInsts.begin());
// Check that the pointer is the return value of the call.
if (!Call || Arg != Call)
@@ -2857,8 +2028,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
if (DepInsts.size() != 1)
return nullptr;
- CallInst *Retain =
- dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ auto *Retain = dyn_cast_or_null<CallInst>(*DepInsts.begin());
// Check that we found a retain with the same argument.
if (!Retain || !IsRetain(GetBasicARCInstKind(Retain)) ||
@@ -2883,8 +2053,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
if (DepInsts.size() != 1)
return nullptr;
- CallInst *Autorelease =
- dyn_cast_or_null<CallInst>(*DepInsts.begin());
+ auto *Autorelease = dyn_cast_or_null<CallInst>(*DepInsts.begin());
if (!Autorelease)
return nullptr;
ARCInstKind AutoreleaseClass = GetBasicARCInstKind(Autorelease);
@@ -2999,28 +2168,13 @@ bool ObjCARCOpt::doInitialization(Module &M) {
if (!Run)
return false;
- // Identify the imprecise release metadata kind.
- ImpreciseReleaseMDKind =
- M.getContext().getMDKindID("clang.imprecise_release");
- CopyOnEscapeMDKind =
- M.getContext().getMDKindID("clang.arc.copy_on_escape");
- NoObjCARCExceptionsMDKind =
- M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
-#ifdef ARC_ANNOTATIONS
- ARCAnnotationBottomUpMDKind =
- M.getContext().getMDKindID("llvm.arc.annotation.bottomup");
- ARCAnnotationTopDownMDKind =
- M.getContext().getMDKindID("llvm.arc.annotation.topdown");
- ARCAnnotationProvenanceSourceMDKind =
- M.getContext().getMDKindID("llvm.arc.annotation.provenancesource");
-#endif // ARC_ANNOTATIONS
-
// Intuitively, objc_retain and others are nocapture, however in practice
// they are not, because they return their argument value. And objc_release
// calls finalizers which can have arbitrary side effects.
+ MDKindCache.init(&M);
// Initialize our runtime entry point cache.
- EP.Initialize(&M);
+ EP.init(&M);
return false;
}
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 410abfc..15ad8dc 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -32,20 +32,22 @@ using namespace llvm::objcarc;
bool ProvenanceAnalysis::relatedSelect(const SelectInst *A,
const Value *B) {
+ const DataLayout &DL = A->getModule()->getDataLayout();
// If the values are Selects with the same condition, we can do a more precise
// check: just check for relations between the values on corresponding arms.
if (const SelectInst *SB = dyn_cast<SelectInst>(B))
if (A->getCondition() == SB->getCondition())
- return related(A->getTrueValue(), SB->getTrueValue()) ||
- related(A->getFalseValue(), SB->getFalseValue());
+ return related(A->getTrueValue(), SB->getTrueValue(), DL) ||
+ related(A->getFalseValue(), SB->getFalseValue(), DL);
// Check both arms of the Select node individually.
- return related(A->getTrueValue(), B) ||
- related(A->getFalseValue(), B);
+ return related(A->getTrueValue(), B, DL) ||
+ related(A->getFalseValue(), B, DL);
}
bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
const Value *B) {
+ const DataLayout &DL = A->getModule()->getDataLayout();
// If the values are PHIs in the same block, we can do a more precise as well
// as efficient check: just check for relations between the values on
// corresponding edges.
@@ -53,7 +55,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
if (PNB->getParent() == A->getParent()) {
for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
if (related(A->getIncomingValue(i),
- PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+ PNB->getIncomingValueForBlock(A->getIncomingBlock(i)), DL))
return true;
return false;
}
@@ -62,7 +64,7 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
SmallPtrSet<const Value *, 4> UniqueSrc;
for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
const Value *PV1 = A->getIncomingValue(i);
- if (UniqueSrc.insert(PV1).second && related(PV1, B))
+ if (UniqueSrc.insert(PV1).second && related(PV1, B, DL))
return true;
}
@@ -103,11 +105,11 @@ static bool IsStoredObjCPointer(const Value *P) {
return false;
}
-bool ProvenanceAnalysis::relatedCheck(const Value *A,
- const Value *B) {
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B,
+ const DataLayout &DL) {
// Skip past provenance pass-throughs.
- A = GetUnderlyingObjCPtr(A);
- B = GetUnderlyingObjCPtr(B);
+ A = GetUnderlyingObjCPtr(A, DL);
+ B = GetUnderlyingObjCPtr(B, DL);
// Quick check.
if (A == B)
@@ -159,8 +161,8 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A,
return true;
}
-bool ProvenanceAnalysis::related(const Value *A,
- const Value *B) {
+bool ProvenanceAnalysis::related(const Value *A, const Value *B,
+ const DataLayout &DL) {
// Begin by inserting a conservative value into the map. If the insertion
// fails, we have the answer already. If it succeeds, leave it there until we
// compute the real answer to guard against recursive queries.
@@ -170,7 +172,7 @@ bool ProvenanceAnalysis::related(const Value *A,
if (!Pair.second)
return Pair.first->second;
- bool Result = relatedCheck(A, B);
+ bool Result = relatedCheck(A, B, DL);
CachedResults[ValuePairTy(A, B)] = Result;
return Result;
}
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 4b5f4d8..0ac41d3 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -30,6 +30,7 @@
namespace llvm {
class Value;
class AliasAnalysis;
+ class DataLayout;
class PHINode;
class SelectInst;
}
@@ -53,7 +54,7 @@ class ProvenanceAnalysis {
typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
CachedResultsTy CachedResults;
- bool relatedCheck(const Value *A, const Value *B);
+ bool relatedCheck(const Value *A, const Value *B, const DataLayout &DL);
bool relatedSelect(const SelectInst *A, const Value *B);
bool relatedPHI(const PHINode *A, const Value *B);
@@ -67,7 +68,7 @@ public:
AliasAnalysis *getAA() const { return AA; }
- bool related(const Value *A, const Value *B);
+ bool related(const Value *A, const Value *B, const DataLayout &DL);
void clear() {
CachedResults.clear();
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index d836632..0be75af 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -65,6 +66,7 @@ bool PAEval::runOnFunction(Function &F) {
ProvenanceAnalysis PA;
PA.setAA(&getAnalysis<AliasAnalysis>());
+ const DataLayout &DL = F.getParent()->getDataLayout();
for (Value *V1 : Values) {
StringRef NameV1 = getName(V1);
@@ -73,7 +75,7 @@ bool PAEval::runOnFunction(Function &F) {
if (NameV1 >= NameV2)
continue;
errs() << NameV1 << " and " << NameV2;
- if (PA.related(V1, V2))
+ if (PA.related(V1, V2, DL))
errs() << " are related.\n";
else
errs() << " are not related.\n";
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
new file mode 100644
index 0000000..ae20e7e
--- /dev/null
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -0,0 +1,404 @@
+//===--- PtrState.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PtrState.h"
+#include "DependencyAnalysis.h"
+#include "ObjCARC.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+#define DEBUG_TYPE "objc-arc-ptr-state"
+
+//===----------------------------------------------------------------------===//
+// Utility
+//===----------------------------------------------------------------------===//
+
+raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, const Sequence S) {
+ switch (S) {
+ case S_None:
+ return OS << "S_None";
+ case S_Retain:
+ return OS << "S_Retain";
+ case S_CanRelease:
+ return OS << "S_CanRelease";
+ case S_Use:
+ return OS << "S_Use";
+ case S_Release:
+ return OS << "S_Release";
+ case S_MovableRelease:
+ return OS << "S_MovableRelease";
+ case S_Stop:
+ return OS << "S_Stop";
+ }
+ llvm_unreachable("Unknown sequence type.");
+}
+
+//===----------------------------------------------------------------------===//
+// Sequence
+//===----------------------------------------------------------------------===//
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+ // The easy cases.
+ if (A == B)
+ return A;
+ if (A == S_None || B == S_None)
+ return S_None;
+
+ if (A > B)
+ std::swap(A, B);
+ if (TopDown) {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Retain || A == S_CanRelease) &&
+ (B == S_CanRelease || B == S_Use))
+ return B;
+ } else {
+ // Choose the side which is further along in the sequence.
+ if ((A == S_Use || A == S_CanRelease) &&
+ (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
+ return A;
+ // If both sides are releases, choose the more conservative one.
+ if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+ return A;
+ if (A == S_Release && B == S_MovableRelease)
+ return A;
+ }
+
+ return S_None;
+}
+
+//===----------------------------------------------------------------------===//
+// RRInfo
+//===----------------------------------------------------------------------===//
+
+void RRInfo::clear() {
+ KnownSafe = false;
+ IsTailCallRelease = false;
+ ReleaseMetadata = nullptr;
+ Calls.clear();
+ ReverseInsertPts.clear();
+ CFGHazardAfflicted = false;
+}
+
+bool RRInfo::Merge(const RRInfo &Other) {
+ // Conservatively merge the ReleaseMetadata information.
+ if (ReleaseMetadata != Other.ReleaseMetadata)
+ ReleaseMetadata = nullptr;
+
+ // Conservatively merge the boolean state.
+ KnownSafe &= Other.KnownSafe;
+ IsTailCallRelease &= Other.IsTailCallRelease;
+ CFGHazardAfflicted |= Other.CFGHazardAfflicted;
+
+ // Merge the call sets.
+ Calls.insert(Other.Calls.begin(), Other.Calls.end());
+
+ // Merge the insert point sets. If there are any differences,
+ // that makes this a partial merge.
+ bool Partial = ReverseInsertPts.size() != Other.ReverseInsertPts.size();
+ for (Instruction *Inst : Other.ReverseInsertPts)
+ Partial |= ReverseInsertPts.insert(Inst).second;
+ return Partial;
+}
+
+//===----------------------------------------------------------------------===//
+// PtrState
+//===----------------------------------------------------------------------===//
+
+void PtrState::SetKnownPositiveRefCount() {
+ DEBUG(dbgs() << " Setting Known Positive.\n");
+ KnownPositiveRefCount = true;
+}
+
+void PtrState::ClearKnownPositiveRefCount() {
+ DEBUG(dbgs() << " Clearing Known Positive.\n");
+ KnownPositiveRefCount = false;
+}
+
+void PtrState::SetSeq(Sequence NewSeq) {
+ DEBUG(dbgs() << " Old: " << GetSeq() << "; New: " << NewSeq << "\n");
+ Seq = NewSeq;
+}
+
+void PtrState::ResetSequenceProgress(Sequence NewSeq) {
+ DEBUG(dbgs() << " Resetting sequence progress.\n");
+ SetSeq(NewSeq);
+ Partial = false;
+ RRI.clear();
+}
+
+void PtrState::Merge(const PtrState &Other, bool TopDown) {
+ Seq = MergeSeqs(GetSeq(), Other.GetSeq(), TopDown);
+ KnownPositiveRefCount &= Other.KnownPositiveRefCount;
+
+ // If we're not in a sequence (anymore), drop all associated state.
+ if (Seq == S_None) {
+ Partial = false;
+ RRI.clear();
+ } else if (Partial || Other.Partial) {
+ // If we're doing a merge on a path that's previously seen a partial
+ // merge, conservatively drop the sequence, to avoid doing partial
+ // RR elimination. If the branch predicates for the two merge differ,
+ // mixing them is unsafe.
+ ClearSequenceProgress();
+ } else {
+ // Otherwise merge the other PtrState's RRInfo into our RRInfo. At this
+ // point, we know that currently we are not partial. Stash whether or not
+ // the merge operation caused us to undergo a partial merging of reverse
+ // insertion points.
+ Partial = RRI.Merge(Other.RRI);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// BottomUpPtrState
+//===----------------------------------------------------------------------===//
+
+bool BottomUpPtrState::InitBottomUp(ARCMDKindCache &Cache, Instruction *I) {
+ // If we see two releases in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second release, which may allow us to
+ // eliminate the first release too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ bool NestingDetected = false;
+ if (GetSeq() == S_Release || GetSeq() == S_MovableRelease) {
+ DEBUG(dbgs() << " Found nested releases (i.e. a release pair)\n");
+ NestingDetected = true;
+ }
+
+ MDNode *ReleaseMetadata =
+ I->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease));
+ Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release;
+ ResetSequenceProgress(NewSeq);
+ SetReleaseMetadata(ReleaseMetadata);
+ SetKnownSafe(HasKnownPositiveRefCount());
+ SetTailCallRelease(cast<CallInst>(I)->isTailCall());
+ InsertCall(I);
+ SetKnownPositiveRefCount();
+ return NestingDetected;
+}
+
+bool BottomUpPtrState::MatchWithRetain() {
+ SetKnownPositiveRefCount();
+
+ Sequence OldSeq = GetSeq();
+ switch (OldSeq) {
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Use:
+ // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+ // imprecise release, clear our reverse insertion points.
+ if (OldSeq != S_Use || IsTrackingImpreciseReleases())
+ ClearReverseInsertPts();
+ // FALL THROUGH
+ case S_CanRelease:
+ return true;
+ case S_None:
+ return false;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ llvm_unreachable("Sequence unknown enum value");
+}
+
+bool BottomUpPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
+ const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ Sequence S = GetSeq();
+
+ // Check for possible releases.
+ if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+ return false;
+
+ DEBUG(dbgs() << " CanAlterRefCount: Seq: " << S << "; " << *Ptr
+ << "\n");
+ switch (S) {
+ case S_Use:
+ SetSeq(S_CanRelease);
+ return true;
+ case S_CanRelease:
+ case S_Release:
+ case S_MovableRelease:
+ case S_Stop:
+ case S_None:
+ return false;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+ llvm_unreachable("Sequence unknown enum value");
+}
+
+void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
+ const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ // Check for possible direct uses.
+ switch (GetSeq()) {
+ case S_Release:
+ case S_MovableRelease:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr
+ << "\n");
+ assert(!HasReverseInsertPts());
+ // If this is an invoke instruction, we're scanning it as part of
+ // one of its successor blocks, since we can't insert code after it
+ // in its own block, and we don't want to split critical edges.
+ if (isa<InvokeInst>(Inst))
+ InsertReverseInsertPt(BB->getFirstInsertionPt());
+ else
+ InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+ SetSeq(S_Use);
+ } else if (Seq == S_Release && IsUser(Class)) {
+ DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; "
+ << *Ptr << "\n");
+ // Non-movable releases depend on any possible objc pointer use.
+ SetSeq(S_Stop);
+ assert(!HasReverseInsertPts());
+ // As above; handle invoke specially.
+ if (isa<InvokeInst>(Inst))
+ InsertReverseInsertPt(BB->getFirstInsertionPt());
+ else
+ InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+ }
+ break;
+ case S_Stop:
+ if (CanUse(Inst, Ptr, PA, Class)) {
+ DEBUG(dbgs() << " PreciseStopUse: Seq: " << GetSeq() << "; "
+ << *Ptr << "\n");
+ SetSeq(S_Use);
+ }
+ break;
+ case S_CanRelease:
+ case S_Use:
+ case S_None:
+ break;
+ case S_Retain:
+ llvm_unreachable("bottom-up pointer in retain state!");
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// TopDownPtrState
+//===----------------------------------------------------------------------===//
+
+bool TopDownPtrState::InitTopDown(ARCInstKind Kind, Instruction *I) {
+ bool NestingDetected = false;
+ // Don't do retain+release tracking for ARCInstKind::RetainRV, because
+ // it's
+ // better to let it remain as the first instruction after a call.
+ if (Kind != ARCInstKind::RetainRV) {
+ // If we see two retains in a row on the same pointer. If so, make
+ // a note, and we'll cicle back to revisit it after we've
+ // hopefully eliminated the second retain, which may allow us to
+ // eliminate the first retain too.
+ // Theoretically we could implement removal of nested retain+release
+ // pairs by making PtrState hold a stack of states, but this is
+ // simple and avoids adding overhead for the non-nested case.
+ if (GetSeq() == S_Retain)
+ NestingDetected = true;
+
+ ResetSequenceProgress(S_Retain);
+ SetKnownSafe(HasKnownPositiveRefCount());
+ InsertCall(I);
+ }
+
+ SetKnownPositiveRefCount();
+ return NestingDetected;
+}
+
+bool TopDownPtrState::MatchWithRelease(ARCMDKindCache &Cache,
+ Instruction *Release) {
+ ClearKnownPositiveRefCount();
+
+ Sequence OldSeq = GetSeq();
+
+ MDNode *ReleaseMetadata =
+ Release->getMetadata(Cache.get(ARCMDKindID::ImpreciseRelease));
+
+ switch (OldSeq) {
+ case S_Retain:
+ case S_CanRelease:
+ if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
+ ClearReverseInsertPts();
+ // FALL THROUGH
+ case S_Use:
+ SetReleaseMetadata(ReleaseMetadata);
+ SetTailCallRelease(cast<CallInst>(Release)->isTailCall());
+ return true;
+ case S_None:
+ return false;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in bottom up state!");
+ }
+ llvm_unreachable("Sequence unknown enum value");
+}
+
+bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
+ const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ // Check for possible releases.
+ if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+ return false;
+
+ DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr
+ << "\n");
+ ClearKnownPositiveRefCount();
+ switch (GetSeq()) {
+ case S_Retain:
+ SetSeq(S_CanRelease);
+ assert(!HasReverseInsertPts());
+ InsertReverseInsertPt(Inst);
+
+ // One call can't cause a transition from S_Retain to S_CanRelease
+ // and S_CanRelease to S_Use. If we've made the first transition,
+ // we're done.
+ return true;
+ case S_Use:
+ case S_CanRelease:
+ case S_None:
+ return false;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+ llvm_unreachable("covered switch is not covered!?");
+}
+
+void TopDownPtrState::HandlePotentialUse(Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA,
+ ARCInstKind Class) {
+ // Check for possible direct uses.
+ switch (GetSeq()) {
+ case S_CanRelease:
+ if (!CanUse(Inst, Ptr, PA, Class))
+ return;
+ DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr
+ << "\n");
+ SetSeq(S_Use);
+ return;
+ case S_Retain:
+ case S_Use:
+ case S_None:
+ return;
+ case S_Stop:
+ case S_Release:
+ case S_MovableRelease:
+ llvm_unreachable("top-down pointer in release state!");
+ }
+}
diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h
new file mode 100644
index 0000000..e45e1ea
--- /dev/null
+++ b/lib/Transforms/ObjCARC/PtrState.h
@@ -0,0 +1,210 @@
+//===--- PtrState.h - ARC State for a Ptr -------------------*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declarations for the ARC state associated with a ptr. It
+// is only used by the ARC Sequence Dataflow computation. By separating this
+// from the actual dataflow, it is easier to consider the mechanics of the ARC
+// optimization separate from the actual predicates being used.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
+#define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
+
+#include "ARCInstKind.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace objcarc {
+
+class ARCMDKindCache;
+class ProvenanceAnalysis;
+
+/// \enum Sequence
+///
+/// \brief A sequence of states that a pointer may go through in which an
+/// objc_retain and objc_release are actually needed.
+enum Sequence {
+ S_None,
+ S_Retain, ///< objc_retain(x).
+ S_CanRelease, ///< foo(x) -- x could possibly see a ref count decrement.
+ S_Use, ///< any use of x.
+ S_Stop, ///< like S_Release, but code motion is stopped.
+ S_Release, ///< objc_release(x).
+ S_MovableRelease ///< objc_release(x), !clang.imprecise_release.
+};
+
+raw_ostream &operator<<(raw_ostream &OS,
+ const Sequence S) LLVM_ATTRIBUTE_UNUSED;
+
+/// \brief Unidirectional information about either a
+/// retain-decrement-use-release sequence or release-use-decrement-retain
+/// reverse sequence.
+struct RRInfo {
+ /// After an objc_retain, the reference count of the referenced
+ /// object is known to be positive. Similarly, before an objc_release, the
+ /// reference count of the referenced object is known to be positive. If
+ /// there are retain-release pairs in code regions where the retain count
+ /// is known to be positive, they can be eliminated, regardless of any side
+ /// effects between them.
+ ///
+ /// Also, a retain+release pair nested within another retain+release
+ /// pair all on the known same pointer value can be eliminated, regardless
+ /// of any intervening side effects.
+ ///
+ /// KnownSafe is true when either of these conditions is satisfied.
+ bool KnownSafe;
+
+ /// True of the objc_release calls are all marked with the "tail" keyword.
+ bool IsTailCallRelease;
+
+ /// If the Calls are objc_release calls and they all have a
+ /// clang.imprecise_release tag, this is the metadata tag.
+ MDNode *ReleaseMetadata;
+
+ /// For a top-down sequence, the set of objc_retains or
+ /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+ SmallPtrSet<Instruction *, 2> Calls;
+
+ /// The set of optimal insert positions for moving calls in the opposite
+ /// sequence.
+ SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+ /// If this is true, we cannot perform code motion but can still remove
+ /// retain/release pairs.
+ bool CFGHazardAfflicted;
+
+ RRInfo()
+ : KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr),
+ CFGHazardAfflicted(false) {}
+
+ void clear();
+
+ /// Conservatively merge the two RRInfo. Returns true if a partial merge has
+ /// occurred, false otherwise.
+ bool Merge(const RRInfo &Other);
+};
+
+/// \brief This class summarizes several per-pointer runtime properties which
+/// are propogated through the flow graph.
+class PtrState {
+protected:
+ /// True if the reference count is known to be incremented.
+ bool KnownPositiveRefCount;
+
+ /// True if we've seen an opportunity for partial RR elimination, such as
+ /// pushing calls into a CFG triangle or into one side of a CFG diamond.
+ bool Partial;
+
+ /// The current position in the sequence.
+ unsigned char Seq : 8;
+
+ /// Unidirectional information about the current sequence.
+ RRInfo RRI;
+
+ PtrState() : KnownPositiveRefCount(false), Partial(false), Seq(S_None) {}
+
+public:
+ bool IsKnownSafe() const { return RRI.KnownSafe; }
+
+ void SetKnownSafe(const bool NewValue) { RRI.KnownSafe = NewValue; }
+
+ bool IsTailCallRelease() const { return RRI.IsTailCallRelease; }
+
+ void SetTailCallRelease(const bool NewValue) {
+ RRI.IsTailCallRelease = NewValue;
+ }
+
+ bool IsTrackingImpreciseReleases() const {
+ return RRI.ReleaseMetadata != nullptr;
+ }
+
+ const MDNode *GetReleaseMetadata() const { return RRI.ReleaseMetadata; }
+
+ void SetReleaseMetadata(MDNode *NewValue) { RRI.ReleaseMetadata = NewValue; }
+
+ bool IsCFGHazardAfflicted() const { return RRI.CFGHazardAfflicted; }
+
+ void SetCFGHazardAfflicted(const bool NewValue) {
+ RRI.CFGHazardAfflicted = NewValue;
+ }
+
+ void SetKnownPositiveRefCount();
+ void ClearKnownPositiveRefCount();
+
+ bool HasKnownPositiveRefCount() const { return KnownPositiveRefCount; }
+
+ void SetSeq(Sequence NewSeq);
+
+ Sequence GetSeq() const { return static_cast<Sequence>(Seq); }
+
+ void ClearSequenceProgress() { ResetSequenceProgress(S_None); }
+
+ void ResetSequenceProgress(Sequence NewSeq);
+ void Merge(const PtrState &Other, bool TopDown);
+
+ void InsertCall(Instruction *I) { RRI.Calls.insert(I); }
+
+ void InsertReverseInsertPt(Instruction *I) { RRI.ReverseInsertPts.insert(I); }
+
+ void ClearReverseInsertPts() { RRI.ReverseInsertPts.clear(); }
+
+ bool HasReverseInsertPts() const { return !RRI.ReverseInsertPts.empty(); }
+
+ const RRInfo &GetRRInfo() const { return RRI; }
+};
+
+struct BottomUpPtrState : PtrState {
+ BottomUpPtrState() : PtrState() {}
+
+ /// (Re-)Initialize this bottom up pointer returning true if we detected a
+ /// pointer with nested releases.
+ bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I);
+
+ /// Return true if this set of releases can be paired with a release. Modifies
+ /// state appropriately to reflect that the matching occured if it is
+ /// successful.
+ ///
+ /// It is assumed that one has already checked that the RCIdentity of the
+ /// retain and the RCIdentity of this ptr state are the same.
+ bool MatchWithRetain();
+
+ void HandlePotentialUse(BasicBlock *BB, Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+ bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+};
+
+struct TopDownPtrState : PtrState {
+ TopDownPtrState() : PtrState() {}
+
+ /// (Re-)Initialize this bottom up pointer returning true if we detected a
+ /// pointer with nested releases.
+ bool InitTopDown(ARCInstKind Kind, Instruction *I);
+
+ /// Return true if this set of retains can be paired with the given
+ /// release. Modifies state appropriately to reflect that the matching
+ /// occured.
+ bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release);
+
+ void HandlePotentialUse(Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+
+ bool HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr,
+ ProvenanceAnalysis &PA, ARCInstKind Class);
+};
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 5c74885..5aa2b97 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -23,15 +23,15 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -71,7 +71,6 @@ struct AlignmentFromAssumptions : public FunctionPass {
ScalarEvolution *SE;
DominatorTree *DT;
- const DataLayout *DL;
bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
const SCEV *&OffSCEV);
@@ -123,7 +122,7 @@ static unsigned getNewAlignmentDiff(const SCEV *DiffSCEV,
// If the displacement is not an exact multiple, but the remainder is a
// constant, then return this remainder (but only if it is a power of 2).
- uint64_t DiffUnitsAbs = abs64(DiffUnits);
+ uint64_t DiffUnitsAbs = std::abs(DiffUnits);
if (isPowerOf2_64(DiffUnitsAbs))
return (unsigned) DiffUnitsAbs;
}
@@ -316,7 +315,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
continue;
if (Instruction *K = dyn_cast<Instruction>(J))
- if (isValidAssumeForContext(ACall, K, DL, DT))
+ if (isValidAssumeForContext(ACall, K, DT))
WorkList.push_back(K);
}
@@ -400,7 +399,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
Visited.insert(J);
for (User *UJ : J->users()) {
Instruction *K = cast<Instruction>(UJ);
- if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT))
+ if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT))
WorkList.push_back(K);
}
}
@@ -413,8 +412,6 @@ bool AlignmentFromAssumptions::runOnFunction(Function &F) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
NewDestAlignments.clear();
NewSrcAlignments.clear();
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index ed803cd..cf30f39 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -20,6 +20,7 @@ transforms_scalar_SRC_FILES := \
LoopDeletion.cpp \
LoopIdiomRecognize.cpp \
LoopInstSimplify.cpp \
+ LoopInterchange.cpp \
LoopRerollPass.cpp \
LoopRotation.cpp \
LoopStrengthReduce.cpp \
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index c7bd79d..09c605e 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -64,7 +64,6 @@ struct BDCE : public FunctionPass {
APInt &KnownZero2, APInt &KnownOne2);
AssumptionCache *AC;
- const DataLayout *DL;
DominatorTree *DT;
};
}
@@ -95,20 +94,21 @@ void BDCE::determineLiveOperandBits(const Instruction *UserI,
// however, want to do this twice, so we cache the result in APInts that live
// in the caller. For the two-relevant-operands case, both operand values are
// provided here.
- auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1,
- const Value *V2) {
- KnownZero = APInt(BitWidth, 0);
- KnownOne = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value*>(V1), KnownZero, KnownOne, DL, 0, AC,
- UserI, DT);
-
- if (V2) {
- KnownZero2 = APInt(BitWidth, 0);
- KnownOne2 = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value*>(V2), KnownZero2, KnownOne2, DL, 0, AC,
- UserI, DT);
- }
- };
+ auto ComputeKnownBits =
+ [&](unsigned BitWidth, const Value *V1, const Value *V2) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ KnownZero = APInt(BitWidth, 0);
+ KnownOne = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
+ AC, UserI, DT);
+
+ if (V2) {
+ KnownZero2 = APInt(BitWidth, 0);
+ KnownOne2 = APInt(BitWidth, 0);
+ computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
+ 0, AC, UserI, DT);
+ }
+ };
switch (UserI->getOpcode()) {
default: break;
@@ -263,7 +263,6 @@ bool BDCE::runOnFunction(Function& F) {
return false;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DL = F.getParent()->getDataLayout();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DenseMap<Instruction *, APInt> AliveBits;
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index d297eb1..d12fdb7 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_library(LLVMScalarOpts
LoopDeletion.cpp
LoopIdiomRecognize.cpp
LoopInstSimplify.cpp
+ LoopInterchange.cpp
LoopRerollPass.cpp
LoopRotation.cpp
LoopStrengthReduce.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index e3aab4b..4288742 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <tuple>
using namespace llvm;
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 29d4e05..c974ebb 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -22,7 +22,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/Constant.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/Pass.h"
@@ -68,8 +67,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.insert(&*i);
}
bool Changed = false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
+ const DataLayout &DL = F.getParent()->getDataLayout();
TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 5a3b5cf..912d527 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -126,8 +127,9 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
Changed = true;
}
- // FIXME: Provide DL, TLI, DT, AT to SimplifyInstruction.
- if (Value *V = SimplifyInstruction(P)) {
+ // FIXME: Provide TLI, DT, AT to SimplifyInstruction.
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+ if (Value *V = SimplifyInstruction(P, DL)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c2ce1d5..cb8981b 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -33,7 +34,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -78,7 +79,8 @@ namespace {
bool HandleFree(CallInst *F);
bool handleEndBlock(BasicBlock &BB);
void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
- SmallSetVector<Value*, 16> &DeadStackObjects);
+ SmallSetVector<Value *, 16> &DeadStackObjects,
+ const DataLayout &DL);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -194,18 +196,12 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
/// describe the memory operations for this instruction.
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
- const DataLayout *DL = AA.getDataLayout();
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
return AA.getLocation(SI);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
// memcpy/memmove/memset.
AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
- // If we don't have target data around, an unknown size in Location means
- // that we should use the size of the pointee type. This isn't valid for
- // memset/memcpy, which writes more than an i8.
- if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr)
- return AliasAnalysis::Location();
return Loc;
}
@@ -215,11 +211,6 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
case Intrinsic::init_trampoline:
- // If we don't have target data around, an unknown size in Location means
- // that we should use the size of the pointee type. This isn't valid for
- // init.trampoline, which writes more than an i8.
- if (!DL) return AliasAnalysis::Location();
-
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
return AliasAnalysis::Location(II->getArgOperand(0));
@@ -321,9 +312,10 @@ static Value *getStoredPointerOperand(Instruction *I) {
return CS.getArgument(0);
}
-static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
+static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
uint64_t Size;
- if (getObjectSize(V, Size, AA.getDataLayout(), AA.getTargetLibraryInfo()))
+ if (getObjectSize(V, Size, DL, TLI))
return Size;
return AliasAnalysis::UnknownSize;
}
@@ -343,10 +335,9 @@ namespace {
/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
const AliasAnalysis::Location &Earlier,
- AliasAnalysis &AA,
- int64_t &EarlierOff,
- int64_t &LaterOff) {
- const DataLayout *DL = AA.getDataLayout();
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ int64_t &EarlierOff, int64_t &LaterOff) {
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -367,7 +358,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
- Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr)
+ Earlier.Size == AliasAnalysis::UnknownSize)
return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
@@ -382,7 +373,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
return OverwriteUnknown;
// If the "Later" store is to a recognizable object, get its size.
- uint64_t ObjectSize = getPointerSize(UO2, AA);
+ uint64_t ObjectSize = getPointerSize(UO2, DL, TLI);
if (ObjectSize != AliasAnalysis::UnknownSize)
if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
return OverwriteComplete;
@@ -560,8 +551,10 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
- DepWriteOffset, InstWriteOffset);
+ const DataLayout &DL = BB.getModule()->getDataLayout();
+ OverwriteResult OR =
+ isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(),
+ DepWriteOffset, InstWriteOffset);
if (OR == OverwriteComplete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
@@ -655,6 +648,7 @@ bool DSE::HandleFree(CallInst *F) {
AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0));
SmallVector<BasicBlock *, 16> Blocks;
Blocks.push_back(F->getParent());
+ const DataLayout &DL = F->getModule()->getDataLayout();
while (!Blocks.empty()) {
BasicBlock *BB = Blocks.pop_back_val();
@@ -668,7 +662,7 @@ bool DSE::HandleFree(CallInst *F) {
break;
Value *DepPointer =
- GetUnderlyingObject(getStoredPointerOperand(Dependency));
+ GetUnderlyingObject(getStoredPointerOperand(Dependency), DL);
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
@@ -728,6 +722,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (AI->hasByValOrInAllocaAttr())
DeadStackObjects.insert(AI);
+ const DataLayout &DL = BB.getModule()->getDataLayout();
+
// Scan the basic block backwards
for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
--BBI;
@@ -736,7 +732,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<Value *, 4> Pointers;
- GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
+ GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL);
// Stores to stack values are valid candidates for removal.
bool AllDead = true;
@@ -799,8 +795,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// the call is live.
DeadStackObjects.remove_if([&](Value *I) {
// See if the call site touches the value.
- AliasAnalysis::ModRefResult A =
- AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+ AliasAnalysis::ModRefResult A = AA->getModRefInfo(
+ CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
});
@@ -835,7 +831,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Remove any allocas from the DeadPointer set that are loaded, as this
// makes any stores above the access live.
- RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
+ RemoveAccessedObjects(LoadedLoc, DeadStackObjects, DL);
// If all of the allocas were clobbered by the access then we're not going
// to find anything else to process.
@@ -850,8 +846,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
/// of the stack objects in the DeadStackObjects set. If so, they become live
/// because the location is being loaded.
void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
- SmallSetVector<Value*, 16> &DeadStackObjects) {
- const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+ SmallSetVector<Value *, 16> &DeadStackObjects,
+ const DataLayout &DL) {
+ const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr, DL);
// A constant can't be in the dead pointer set.
if (isa<Constant>(UnderlyingPointer))
@@ -867,7 +864,8 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
// Remove objects that could alias LoadedLoc.
DeadStackObjects.remove_if([&](Value *I) {
// See if the loaded location could alias the stack location.
- AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+ AliasAnalysis::Location StackLoc(
+ I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
return !AA->isNoAlias(StackLoc, LoadedLoc);
});
}
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 9309623..d5b9e03 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -27,7 +28,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/RecyclingAllocator.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <deque>
@@ -263,7 +264,6 @@ namespace {
class EarlyCSE {
public:
Function &F;
- const DataLayout *DL;
const TargetLibraryInfo &TLI;
const TargetTransformInfo &TTI;
DominatorTree &DT;
@@ -308,11 +308,10 @@ public:
unsigned CurrentGeneration;
/// \brief Set up the EarlyCSE runner for a particular function.
- EarlyCSE(Function &F, const DataLayout *DL, const TargetLibraryInfo &TLI,
+ EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
const TargetTransformInfo &TTI, DominatorTree &DT,
AssumptionCache &AC)
- : F(F), DL(DL), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {
- }
+ : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
bool run();
@@ -469,6 +468,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
Instruction *LastStore = nullptr;
bool Changed = false;
+ const DataLayout &DL = BB->getModule()->getDataLayout();
// See if any instructions in the block can be eliminated. If so, do it. If
// not, add them to AvailableValues.
@@ -685,14 +685,12 @@ bool EarlyCSE::run() {
PreservedAnalyses EarlyCSEPass::run(Function &F,
AnalysisManager<Function> *AM) {
- const DataLayout *DL = F.getParent()->getDataLayout();
-
auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
auto &TTI = AM->getResult<TargetIRAnalysis>(F);
auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
auto &AC = AM->getResult<AssumptionAnalysis>(F);
- EarlyCSE CSE(F, DL, TLI, TTI, DT, AC);
+ EarlyCSE CSE(F, TLI, TTI, DT, AC);
if (!CSE.run())
return PreservedAnalyses::all();
@@ -724,14 +722,12 @@ public:
if (skipOptnoneFunction(F))
return false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- auto *DL = DLP ? &DLP->getDataLayout() : nullptr;
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- EarlyCSE CSE(F, DL, TLI, TTI, DT, AC);
+ EarlyCSE CSE(F, TLI, TTI, DT, AC);
return CSE.run();
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 73a1f25..c73e60f 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -45,7 +46,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -584,14 +585,13 @@ namespace {
/// Emit code into this block to adjust the value defined here to the
/// specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+ Value *MaterializeAdjustedValue(LoadInst *LI, GVN &gvn) const;
};
class GVN : public FunctionPass {
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
- const DataLayout *DL;
const TargetLibraryInfo *TLI;
AssumptionCache *AC;
SetVector<BasicBlock *> DeadBlocks;
@@ -630,7 +630,6 @@ namespace {
InstrsToErase.push_back(I);
}
- const DataLayout *getDataLayout() const { return DL; }
DominatorTree &getDominatorTree() const { return *DT; }
AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); }
MemoryDependenceAnalysis &getMemDep() const { return *MD; }
@@ -956,8 +955,9 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&DL);
- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &DL);
+ Value *StoreBase =
+ GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
if (StoreBase != LoadBase)
return -1;
@@ -1021,13 +1021,13 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
/// This function is called when we have a
/// memdep query of a load that ends up being a clobbering store.
static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
- StoreInst *DepSI,
- const DataLayout &DL) {
+ StoreInst *DepSI) {
// Cannot handle reading from store of first-class aggregate yet.
if (DepSI->getValueOperand()->getType()->isStructTy() ||
DepSI->getValueOperand()->getType()->isArrayTy())
return -1;
+ const DataLayout &DL = DepSI->getModule()->getDataLayout();
Value *StorePtr = DepSI->getPointerOperand();
uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
@@ -1052,11 +1052,11 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
// then we should widen it!
int64_t LoadOffs = 0;
const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &DL);
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
- unsigned Size = MemoryDependenceAnalysis::
- getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, DL);
+ unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+ LoadBase, LoadOffs, LoadSize, DepLI);
if (Size == 0) return -1;
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
@@ -1086,7 +1086,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
Constant *Src = dyn_cast<Constant>(MTI->getSource());
if (!Src) return -1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL));
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
if (!GV || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
@@ -1104,7 +1104,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, &DL))
+ if (ConstantFoldLoadFromConstPtr(Src, DL))
return Offset;
return -1;
}
@@ -1157,7 +1157,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
Type *LoadTy, Instruction *InsertPt,
GVN &gvn) {
- const DataLayout &DL = *gvn.getDataLayout();
+ const DataLayout &DL = SrcVal->getModule()->getDataLayout();
// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
// widen SrcVal out to a larger load.
unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType());
@@ -1265,7 +1265,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
Src = ConstantExpr::getGetElementPtr(Src, OffsetCst);
Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, &DL);
+ return ConstantFoldLoadFromConstPtr(Src, DL);
}
@@ -1281,7 +1281,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB,
LI->getParent())) {
assert(!ValuesPerBlock[0].isUndefValue() && "Dead BB dominate this block");
- return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn);
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI, gvn);
}
// Otherwise, we have to construct SSA form.
@@ -1289,8 +1289,6 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
SSAUpdater SSAUpdate(&NewPHIs);
SSAUpdate.Initialize(LI->getType(), LI->getName());
- Type *LoadTy = LI->getType();
-
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
const AvailableValueInBlock &AV = ValuesPerBlock[i];
BasicBlock *BB = AV.BB;
@@ -1298,7 +1296,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
if (SSAUpdate.HasValueForBlock(BB))
continue;
- SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn));
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LI, gvn));
}
// Perform PHI construction.
@@ -1326,16 +1324,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
-Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
+ GVN &gvn) const {
Value *Res;
+ Type *LoadTy = LI->getType();
+ const DataLayout &DL = LI->getModule()->getDataLayout();
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- const DataLayout *DL = gvn.getDataLayout();
- assert(DL && "Need target data to handle type mismatch case");
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
- *DL);
-
+ Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), DL);
+
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1353,10 +1351,8 @@ Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) c
<< *Res << '\n' << "\n\n\n");
}
} else if (isMemIntrinValue()) {
- const DataLayout *DL = gvn.getDataLayout();
- assert(DL && "Need target data to handle type mismatch case");
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
- LoadTy, BB->getTerminator(), *DL);
+ Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
+ BB->getTerminator(), DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
@@ -1383,6 +1379,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// dependencies that produce an unknown value for the load (such as a call
// that could potentially clobber the load).
unsigned NumDeps = Deps.size();
+ const DataLayout &DL = LI->getModule()->getDataLayout();
for (unsigned i = 0, e = NumDeps; i != e; ++i) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
@@ -1409,9 +1406,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
- if (DL && Address) {
- int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
- DepSI, *DL);
+ if (Address) {
+ int Offset =
+ AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
DepSI->getValueOperand(),
@@ -1428,9 +1425,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInfo.getInst())) {
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
- if (DepLI != LI && Address && DL) {
- int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address,
- DepLI, *DL);
+ if (DepLI != LI && Address) {
+ int Offset =
+ AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
@@ -1443,9 +1440,9 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// If the clobbering value is a memset/memcpy/memmove, see if we can
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
- if (DL && Address) {
+ if (Address) {
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
- DepMI, *DL);
+ DepMI, DL);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
Offset));
@@ -1484,8 +1481,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), *DL)) {
+ if (!CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1501,7 +1498,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LD->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) {
+ if (!CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1613,6 +1610,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
+ const DataLayout &DL = LI->getModule()->getDataLayout();
SmallVector<Instruction*, 8> NewInsts;
for (auto &PredLoad : PredLoads) {
BasicBlock *UnavailablePred = PredLoad.first;
@@ -1833,10 +1831,11 @@ bool GVN::processLoad(LoadInst *L) {
// ... to a pointer that has been loaded from before...
MemDepResult Dep = MD->getDependency(L);
+ const DataLayout &DL = L->getModule()->getDataLayout();
// If we have a clobber and target data is around, see if this is a clobber
// that we can fix up through code synthesis.
- if (Dep.isClobber() && DL) {
+ if (Dep.isClobber()) {
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
@@ -1849,12 +1848,11 @@ bool GVN::processLoad(LoadInst *L) {
// access code.
Value *AvailVal = nullptr;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
- int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
- L->getPointerOperand(),
- DepSI, *DL);
+ int Offset = AnalyzeLoadFromClobberingStore(
+ L->getType(), L->getPointerOperand(), DepSI);
if (Offset != -1)
AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
- L->getType(), L, *DL);
+ L->getType(), L, DL);
}
// Check to see if we have something like this:
@@ -1867,9 +1865,8 @@ bool GVN::processLoad(LoadInst *L) {
if (DepLI == L)
return false;
- int Offset = AnalyzeLoadFromClobberingLoad(L->getType(),
- L->getPointerOperand(),
- DepLI, *DL);
+ int Offset = AnalyzeLoadFromClobberingLoad(
+ L->getType(), L->getPointerOperand(), DepLI, DL);
if (Offset != -1)
AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this);
}
@@ -1877,11 +1874,10 @@ bool GVN::processLoad(LoadInst *L) {
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
// a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
- int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
- L->getPointerOperand(),
- DepMI, *DL);
+ int Offset = AnalyzeLoadFromClobberingMemInst(
+ L->getType(), L->getPointerOperand(), DepMI, DL);
if (Offset != -1)
- AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *DL);
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, DL);
}
if (AvailVal) {
@@ -1932,17 +1928,13 @@ bool GVN::processLoad(LoadInst *L) {
// actually have the same type. See if we know how to reuse the stored
// value (depending on its type).
if (StoredVal->getType() != L->getType()) {
- if (DL) {
- StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
- L, *DL);
- if (!StoredVal)
- return false;
-
- DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
- << '\n' << *L << "\n\n\n");
- }
- else
+ StoredVal =
+ CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, DL);
+ if (!StoredVal)
return false;
+
+ DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+ << '\n' << *L << "\n\n\n");
}
// Remove it!
@@ -1961,17 +1953,12 @@ bool GVN::processLoad(LoadInst *L) {
// the same type. See if we know how to reuse the previously loaded value
// (depending on its type).
if (DepLI->getType() != L->getType()) {
- if (DL) {
- AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
- L, *DL);
- if (!AvailableVal)
- return false;
-
- DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
- << "\n" << *L << "\n\n\n");
- }
- else
+ AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, DL);
+ if (!AvailableVal)
return false;
+
+ DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+ << "\n" << *L << "\n\n\n");
}
// Remove it!
@@ -2239,6 +2226,7 @@ bool GVN::processInstruction(Instruction *I) {
// to value numbering it. Value numbering often exposes redundancies, for
// example if it determines that %y is equal to %x then the instruction
// "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+ const DataLayout &DL = I->getModule()->getDataLayout();
if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
I->replaceAllUsesWith(V);
if (MD && V->getType()->getScalarType()->isPointerTy())
@@ -2357,8 +2345,6 @@ bool GVN::runOnFunction(Function& F) {
if (!NoLoads)
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index f99ebbc..51e8041 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -73,7 +73,6 @@ namespace {
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
- const DataLayout *DL;
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
@@ -82,8 +81,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr),
- DL(nullptr), Changed(false) {
+ IndVarSimplify()
+ : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -663,14 +662,14 @@ namespace {
/// extended by this sign or zero extend operation. This is used to determine
/// the final width of the IV before actually widening it.
static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
- const DataLayout *DL, const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI) {
bool IsSigned = Cast->getOpcode() == Instruction::SExt;
if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
return;
Type *Ty = Cast->getType();
uint64_t Width = SE->getTypeSizeInBits(Ty);
- if (DL && !DL->isLegalInteger(Width))
+ if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
return;
// Cast is either an sext or zext up to this point.
@@ -1201,7 +1200,6 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
namespace {
class IndVarSimplifyVisitor : public IVVisitor {
ScalarEvolution *SE;
- const DataLayout *DL;
const TargetTransformInfo *TTI;
PHINode *IVPhi;
@@ -1209,9 +1207,9 @@ namespace {
WideIVInfo WI;
IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
- const DataLayout *DL, const TargetTransformInfo *TTI,
+ const TargetTransformInfo *TTI,
const DominatorTree *DTree)
- : SE(SCEV), DL(DL), TTI(TTI), IVPhi(IV) {
+ : SE(SCEV), TTI(TTI), IVPhi(IV) {
DT = DTree;
WI.NarrowIV = IVPhi;
if (ReduceLiveIVs)
@@ -1219,9 +1217,7 @@ namespace {
}
// Implement the interface used by simplifyUsersOfIV.
- void visitCast(CastInst *Cast) override {
- visitIVCast(Cast, WI, SE, DL, TTI);
- }
+ void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
};
}
@@ -1255,7 +1251,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
PHINode *CurrIV = LoopPhis.pop_back_val();
// Information about sign/zero extensions of CurrIV.
- IndVarSimplifyVisitor Visitor(CurrIV, SE, DL, TTI, DT);
+ IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
@@ -1521,9 +1517,8 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
/// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
/// This is difficult in general for SCEV because of potential overflow. But we
/// could at least handle constant BECounts.
-static PHINode *
-FindLoopCounter(Loop *L, const SCEV *BECount,
- ScalarEvolution *SE, DominatorTree *DT, const DataLayout *DL) {
+static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
+ ScalarEvolution *SE, DominatorTree *DT) {
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
Value *Cond =
@@ -1552,7 +1547,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
// AR may be wider than BECount. With eq/ne tests overflow is immaterial.
// AR may not be a narrower type, or we may never exit.
uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
- if (PhiWidth < BCWidth || (DL && !DL->isLegalInteger(PhiWidth)))
+ if (PhiWidth < BCWidth ||
+ !L->getHeader()->getModule()->getDataLayout().isLegalInteger(PhiWidth))
continue;
const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
@@ -1705,51 +1701,15 @@ LinearFunctionTestReplace(Loop *L,
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
if (L->getExitingBlock() == L->getLoopLatch()) {
+ // Add one to the "backedge-taken" count to get the trip count.
+ // This addition may overflow, which is valid as long as the comparison is
+ // truncated to BackedgeTakenCount->getType().
+ IVCount = SE->getAddExpr(BackedgeTakenCount,
+ SE->getConstant(BackedgeTakenCount->getType(), 1));
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- llvm::Value *IncrementedIndvar =
- IndVar->getIncomingValueForBlock(L->getExitingBlock());
- const auto *IncrementedIndvarSCEV =
- cast<SCEVAddRecExpr>(SE->getSCEV(IncrementedIndvar));
- // It is unsafe to use the incremented indvar if it has a wrapping flag, we
- // don't want to compare against a poison value. Check the SCEV that
- // corresponds to the incremented indvar, the SCEVExpander will only insert
- // flags in the IR if the SCEV originally had wrapping flags.
- // FIXME: In theory, SCEV could drop flags even though they exist in IR.
- // A more robust solution would involve getting a new expression for
- // CmpIndVar by applying non-NSW/NUW AddExprs.
- auto WrappingFlags =
- ScalarEvolution::setFlags(SCEV::FlagNUW, SCEV::FlagNSW);
- const SCEV *IVInit = IncrementedIndvarSCEV->getStart();
- if (SE->getTypeSizeInBits(IVInit->getType()) >
- SE->getTypeSizeInBits(IVCount->getType()))
- IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
- unsigned BitWidth = SE->getTypeSizeInBits(IVCount->getType());
- Type *WideTy = IntegerType::get(SE->getContext(), BitWidth + 1);
- // Check if InitIV + BECount+1 requires sign/zero extension.
- // If not, clear the corresponding flag from WrappingFlags because it is not
- // necessary for those flags in the IncrementedIndvarSCEV expression.
- if (SE->getSignExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
- WideTy) ==
- SE->getAddExpr(SE->getSignExtendExpr(IVInit, WideTy),
- SE->getSignExtendExpr(BackedgeTakenCount, WideTy)))
- WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNSW);
- if (SE->getZeroExtendExpr(SE->getAddExpr(IVInit, BackedgeTakenCount),
- WideTy) ==
- SE->getAddExpr(SE->getZeroExtendExpr(IVInit, WideTy),
- SE->getZeroExtendExpr(BackedgeTakenCount, WideTy)))
- WrappingFlags = ScalarEvolution::clearFlags(WrappingFlags, SCEV::FlagNUW);
- if (!ScalarEvolution::maskFlags(IncrementedIndvarSCEV->getNoWrapFlags(),
- WrappingFlags)) {
- // Add one to the "backedge-taken" count to get the trip count.
- // This addition may overflow, which is valid as long as the comparison is
- // truncated to BackedgeTakenCount->getType().
- IVCount =
- SE->getAddExpr(BackedgeTakenCount,
- SE->getConstant(BackedgeTakenCount->getType(), 1));
- CmpIndVar = IncrementedIndvar;
- }
+ CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
}
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
@@ -1932,12 +1892,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
DeadInsts.clear();
Changed = false;
@@ -1949,7 +1908,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
// Create a rewriter object which we'll use to transform the code with.
- SCEVExpander Rewriter(*SE, "indvars");
+ SCEVExpander Rewriter(*SE, DL, "indvars");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
@@ -1978,7 +1937,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
- PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, DL);
+ PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT);
if (IndVar) {
// Check preconditions for proper SCEVExpander operation. SCEV does not
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 8559e63..cbdacad 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -42,7 +42,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Optional.h"
-
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -51,27 +50,23 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
-
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
-
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
-
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-
-#include "llvm/Pass.h"
-
#include <array>
using namespace llvm;
@@ -82,6 +77,9 @@ static cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden,
static cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden,
cl::init(false));
+static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
+ cl::init(false));
+
static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
cl::Hidden, cl::init(10));
@@ -96,23 +94,41 @@ namespace {
///
/// and
///
-/// 2. a condition that is provably true for some range of values taken by the
-/// containing loop's induction variable.
-///
-/// Currently all inductive range checks are branches conditional on an
-/// expression of the form
+/// 2. a condition that is provably true for some contiguous range of values
+/// taken by the containing loop's induction variable.
///
-/// 0 <= (Offset + Scale * I) < Length
-///
-/// where `I' is the canonical induction variable of a loop to which Offset and
-/// Scale are loop invariant, and Length is >= 0. Currently the 'false' branch
-/// is considered cold, looking at profiling data to verify that is a TODO.
-
class InductiveRangeCheck {
+ // Classifies a range check
+ enum RangeCheckKind : unsigned {
+ // Range check of the form "0 <= I".
+ RANGE_CHECK_LOWER = 1,
+
+ // Range check of the form "I < L" where L is known positive.
+ RANGE_CHECK_UPPER = 2,
+
+ // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER
+ // conditions.
+ RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER,
+
+ // Unrecognized range check condition.
+ RANGE_CHECK_UNKNOWN = (unsigned)-1
+ };
+
+ static const char *rangeCheckKindToStr(RangeCheckKind);
+
const SCEV *Offset;
const SCEV *Scale;
Value *Length;
BranchInst *Branch;
+ RangeCheckKind Kind;
+
+ static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
+ ScalarEvolution &SE, Value *&Index,
+ Value *&Length);
+
+ static InductiveRangeCheck::RangeCheckKind
+ parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition,
+ const SCEV *&Index, Value *&UpperLimit);
InductiveRangeCheck() :
Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { }
@@ -124,13 +140,17 @@ public:
void print(raw_ostream &OS) const {
OS << "InductiveRangeCheck:\n";
+ OS << " Kind: " << rangeCheckKindToStr(Kind) << "\n";
OS << " Offset: ";
Offset->print(OS);
OS << " Scale: ";
Scale->print(OS);
OS << " Length: ";
- Length->print(OS);
- OS << " Branch: ";
+ if (Length)
+ Length->print(OS);
+ else
+ OS << "(null)";
+ OS << "\n Branch: ";
getBranch()->print(OS);
OS << "\n";
}
@@ -207,160 +227,156 @@ char InductiveRangeCheckElimination::ID = 0;
INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
"Inductive range check elimination", false, false)
-static bool IsLowerBoundCheck(Value *Check, Value *&IndexV) {
- using namespace llvm::PatternMatch;
+const char *InductiveRangeCheck::rangeCheckKindToStr(
+ InductiveRangeCheck::RangeCheckKind RCK) {
+ switch (RCK) {
+ case InductiveRangeCheck::RANGE_CHECK_UNKNOWN:
+ return "RANGE_CHECK_UNKNOWN";
- ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
- Value *LHS = nullptr, *RHS = nullptr;
+ case InductiveRangeCheck::RANGE_CHECK_UPPER:
+ return "RANGE_CHECK_UPPER";
- if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
- return false;
+ case InductiveRangeCheck::RANGE_CHECK_LOWER:
+ return "RANGE_CHECK_LOWER";
+
+ case InductiveRangeCheck::RANGE_CHECK_BOTH:
+ return "RANGE_CHECK_BOTH";
+ }
+
+ llvm_unreachable("unknown range check type!");
+}
+
+/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI`
+/// cannot
+/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set
+/// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value
+/// being
+/// range checked, and set `Length` to the upper limit `Index` is being range
+/// checked with if (and only if) the range check type is stronger or equal to
+/// RANGE_CHECK_UPPER.
+///
+InductiveRangeCheck::RangeCheckKind
+InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
+ ScalarEvolution &SE, Value *&Index,
+ Value *&Length) {
+
+ auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) {
+ const SCEV *S = SE.getSCEV(V);
+ if (isa<SCEVCouldNotCompute>(S))
+ return false;
+
+ return SE.getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant &&
+ SE.isKnownNonNegative(S);
+ };
+
+ using namespace llvm::PatternMatch;
+
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
switch (Pred) {
default:
- return false;
+ return RANGE_CHECK_UNKNOWN;
case ICmpInst::ICMP_SLE:
std::swap(LHS, RHS);
// fallthrough
case ICmpInst::ICMP_SGE:
- if (!match(RHS, m_ConstantInt<0>()))
- return false;
- IndexV = LHS;
- return true;
+ if (match(RHS, m_ConstantInt<0>())) {
+ Index = LHS;
+ return RANGE_CHECK_LOWER;
+ }
+ return RANGE_CHECK_UNKNOWN;
case ICmpInst::ICMP_SLT:
std::swap(LHS, RHS);
// fallthrough
case ICmpInst::ICMP_SGT:
- if (!match(RHS, m_ConstantInt<-1>()))
- return false;
- IndexV = LHS;
- return true;
- }
-}
-
-static bool IsUpperBoundCheck(Value *Check, Value *Index, Value *&UpperLimit) {
- using namespace llvm::PatternMatch;
-
- ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
- Value *LHS = nullptr, *RHS = nullptr;
-
- if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
- return false;
+ if (match(RHS, m_ConstantInt<-1>())) {
+ Index = LHS;
+ return RANGE_CHECK_LOWER;
+ }
- switch (Pred) {
- default:
- return false;
+ if (IsNonNegativeAndNotLoopVarying(LHS)) {
+ Index = RHS;
+ Length = LHS;
+ return RANGE_CHECK_UPPER;
+ }
+ return RANGE_CHECK_UNKNOWN;
- case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_ULT:
std::swap(LHS, RHS);
// fallthrough
- case ICmpInst::ICMP_SLT:
- if (LHS != Index)
- return false;
- UpperLimit = RHS;
- return true;
-
case ICmpInst::ICMP_UGT:
- std::swap(LHS, RHS);
- // fallthrough
- case ICmpInst::ICMP_ULT:
- if (LHS != Index)
- return false;
- UpperLimit = RHS;
- return true;
+ if (IsNonNegativeAndNotLoopVarying(LHS)) {
+ Index = RHS;
+ Length = LHS;
+ return RANGE_CHECK_BOTH;
+ }
+ return RANGE_CHECK_UNKNOWN;
}
+
+ llvm_unreachable("default clause returns!");
}
-/// Split a condition into something semantically equivalent to (0 <= I <
-/// Limit), both comparisons signed and Len loop invariant on L and positive.
-/// On success, return true and set Index to I and UpperLimit to Limit. Return
-/// false on failure (we may still write to UpperLimit and Index on failure).
-/// It does not try to interpret I as a loop index.
-///
-static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE,
+/// Parses an arbitrary condition into a range check. `Length` is set only if
+/// the range check is recognized to be `RANGE_CHECK_UPPER` or stronger.
+InductiveRangeCheck::RangeCheckKind
+InductiveRangeCheck::parseRangeCheck(Loop *L, ScalarEvolution &SE,
Value *Condition, const SCEV *&Index,
- Value *&UpperLimit) {
-
- // TODO: currently this catches some silly cases like comparing "%idx slt 1".
- // Our transformations are still correct, but less likely to be profitable in
- // those cases. We have to come up with some heuristics that pick out the
- // range checks that are more profitable to clone a loop for. This function
- // in general can be made more robust.
-
+ Value *&Length) {
using namespace llvm::PatternMatch;
Value *A = nullptr;
Value *B = nullptr;
- ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
-
- // In these early checks we assume that the matched UpperLimit is positive.
- // We'll verify that fact later, before returning true.
if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
- Value *IndexV = nullptr;
- Value *ExpectedUpperBoundCheck = nullptr;
+ Value *IndexA = nullptr, *IndexB = nullptr;
+ Value *LengthA = nullptr, *LengthB = nullptr;
+ ICmpInst *ICmpA = dyn_cast<ICmpInst>(A), *ICmpB = dyn_cast<ICmpInst>(B);
- if (IsLowerBoundCheck(A, IndexV))
- ExpectedUpperBoundCheck = B;
- else if (IsLowerBoundCheck(B, IndexV))
- ExpectedUpperBoundCheck = A;
- else
- return false;
+ if (!ICmpA || !ICmpB)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
- if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit))
- return false;
+ auto RCKindA = parseRangeCheckICmp(L, ICmpA, SE, IndexA, LengthA);
+ auto RCKindB = parseRangeCheckICmp(L, ICmpB, SE, IndexB, LengthB);
- Index = SE.getSCEV(IndexV);
+ if (RCKindA == InductiveRangeCheck::RANGE_CHECK_UNKNOWN ||
+ RCKindB == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
- if (isa<SCEVCouldNotCompute>(Index))
- return false;
+ if (IndexA != IndexB)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
- } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
- switch (Pred) {
- default:
- return false;
+ if (LengthA != nullptr && LengthB != nullptr && LengthA != LengthB)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
- case ICmpInst::ICMP_SGT:
- std::swap(A, B);
- // fall through
- case ICmpInst::ICMP_SLT:
- UpperLimit = B;
- Index = SE.getSCEV(A);
- if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index))
- return false;
- break;
+ Index = SE.getSCEV(IndexA);
+ if (isa<SCEVCouldNotCompute>(Index))
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
- case ICmpInst::ICMP_UGT:
- std::swap(A, B);
- // fall through
- case ICmpInst::ICMP_ULT:
- UpperLimit = B;
- Index = SE.getSCEV(A);
- if (isa<SCEVCouldNotCompute>(Index))
- return false;
- break;
- }
- } else {
- return false;
+ Length = LengthA == nullptr ? LengthB : LengthA;
+
+ return (InductiveRangeCheck::RangeCheckKind)(RCKindA | RCKindB);
}
- const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit);
- if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) ||
- !SE.isKnownNonNegative(UpperLimitSCEV))
- return false;
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
+ Value *IndexVal = nullptr;
- if (SE.getLoopDisposition(UpperLimitSCEV, L) !=
- ScalarEvolution::LoopInvariant) {
- DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName()
- << " ";
- dbgs() << " UpperLimit is not loop invariant: "
- << UpperLimit->getName() << "\n";);
- return false;
+ auto RCKind = parseRangeCheckICmp(L, ICI, SE, IndexVal, Length);
+
+ if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ Index = SE.getSCEV(IndexVal);
+ if (isa<SCEVCouldNotCompute>(Index))
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
+
+ return RCKind;
}
- return true;
+ return InductiveRangeCheck::RANGE_CHECK_UNKNOWN;
}
@@ -380,10 +396,15 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,
Value *Length = nullptr;
const SCEV *IndexSCEV = nullptr;
- if (!SplitRangeCheckCondition(L, SE, BI->getCondition(), IndexSCEV, Length))
+ auto RCKind = InductiveRangeCheck::parseRangeCheck(L, SE, BI->getCondition(),
+ IndexSCEV, Length);
+
+ if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN)
return nullptr;
- assert(IndexSCEV && Length && "contract with SplitRangeCheckCondition!");
+ assert(IndexSCEV && "contract with SplitRangeCheckCondition!");
+ assert((!(RCKind & InductiveRangeCheck::RANGE_CHECK_UPPER) || Length) &&
+ "contract with SplitRangeCheckCondition!");
const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV);
bool IsAffineIndex =
@@ -397,6 +418,7 @@ InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,
IRC->Offset = IndexAddRec->getStart();
IRC->Scale = IndexAddRec->getStepRecurrence(SE);
IRC->Branch = BI;
+ IRC->Kind = RCKind;
return IRC;
}
@@ -685,30 +707,40 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
}
}
- auto IsInductionVar = [&SE](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
- if (!AR->isAffine())
- return false;
+ auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) {
+ if (AR->getNoWrapFlags(SCEV::FlagNSW))
+ return true;
IntegerType *Ty = cast<IntegerType>(AR->getType());
IntegerType *WideTy =
IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
- // Currently we only work with induction variables that have been proved to
- // not wrap. This restriction can potentially be lifted in the future.
-
const SCEVAddRecExpr *ExtendAfterOp =
dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
- if (!ExtendAfterOp)
- return false;
+ if (ExtendAfterOp) {
+ const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
+ const SCEV *ExtendedStep =
+ SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
- const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy);
- const SCEV *ExtendedStep =
- SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy);
+ bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
+ ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
+
+ if (NoSignedWrap)
+ return true;
+ }
+
+ // We may have proved this when computing the sign extension above.
+ return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap;
+ };
+
+ auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) {
+ if (!AR->isAffine())
+ return false;
- bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart &&
- ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep;
+ // Currently we only work with induction variables that have been proved to
+ // not wrap. This restriction can potentially be lifted in the future.
- if (!NoSignedWrap)
+ if (!HasNoSignedWrap(AR))
return false;
if (const SCEVConstant *StepExpr =
@@ -791,9 +823,10 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
"loop variant exit count doesn't make sense!");
assert(!L.contains(LatchExit) && "expected an exit block!");
-
- Value *IndVarStartV = SCEVExpander(SE, "irce").expandCodeFor(
- IndVarStart, IndVarTy, &*Preheader->rbegin());
+ const DataLayout &DL = Preheader->getModule()->getDataLayout();
+ Value *IndVarStartV =
+ SCEVExpander(SE, DL, "irce")
+ .expandCodeFor(IndVarStart, IndVarTy, &*Preheader->rbegin());
IndVarStartV->setName("indvar.start");
LoopStructure Result;
@@ -831,12 +864,35 @@ LoopConstrainer::calculateSubRanges() const {
const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt);
bool Increasing = MainLoopStructure.IndVarIncreasing;
+
// We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the
// range of values the induction variable takes.
- const SCEV *Smallest =
- Increasing ? Start : SE.getAddExpr(End, SE.getSCEV(One));
- const SCEV *Greatest =
- Increasing ? End : SE.getAddExpr(Start, SE.getSCEV(One));
+
+ const SCEV *Smallest = nullptr, *Greatest = nullptr;
+
+ if (Increasing) {
+ Smallest = Start;
+ Greatest = End;
+ } else {
+ // These two computations may sign-overflow. Here is why that is okay:
+ //
+ // We know that the induction variable does not sign-overflow on any
+ // iteration except the last one, and it starts at `Start` and ends at
+ // `End`, decrementing by one every time.
+ //
+ // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the
+ // induction variable is decreasing we know that that the smallest value
+ // the loop body is actually executed with is `INT_SMIN` == `Smallest`.
+ //
+ // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In
+ // that case, `Clamp` will always return `Smallest` and
+ // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`)
+ // will be an empty range. Returning an empty range is always safe.
+ //
+
+ Smallest = SE.getAddExpr(End, SE.getSCEV(One));
+ Greatest = SE.getAddExpr(Start, SE.getSCEV(One));
+ }
auto Clamp = [this, Smallest, Greatest](const SCEV *S) {
return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S));
@@ -1132,7 +1188,7 @@ bool LoopConstrainer::run() {
IntegerType *IVTy =
cast<IntegerType>(MainLoopStructure.IndVarNext->getType());
- SCEVExpander Expander(SE, "irce");
+ SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce");
Instruction *InsertPt = OriginalPreheader->getTerminator();
// It would have been better to make `PreLoop' and `PostLoop'
@@ -1293,8 +1349,19 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
const SCEV *M = SE.getMinusSCEV(C, A);
const SCEV *Begin = SE.getNegativeSCEV(M);
- const SCEV *End = SE.getMinusSCEV(SE.getSCEV(getLength()), M);
+ const SCEV *UpperLimit = nullptr;
+
+ // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
+ // We can potentially do much better here.
+ if (Value *V = getLength()) {
+ UpperLimit = SE.getSCEV(V);
+ } else {
+ assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!");
+ unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth();
+ UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
+ }
+ const SCEV *End = SE.getMinusSCEV(UpperLimit, M);
return InductiveRangeCheck::Range(Begin, End);
}
@@ -1344,12 +1411,18 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
if (RangeChecks.empty())
return false;
- DEBUG(dbgs() << "irce: looking at loop "; L->print(dbgs());
- dbgs() << "irce: loop has " << RangeChecks.size()
- << " inductive range checks: \n";
- for (InductiveRangeCheck *IRC : RangeChecks)
- IRC->print(dbgs());
- );
+ auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {
+ OS << "irce: looking at loop "; L->print(OS);
+ OS << "irce: loop has " << RangeChecks.size()
+ << " inductive range checks: \n";
+ for (InductiveRangeCheck *IRC : RangeChecks)
+ IRC->print(OS);
+ };
+
+ DEBUG(PrintRecognizedRangeChecks(dbgs()));
+
+ if (PrintRangeChecks)
+ PrintRecognizedRangeChecks(errs());
const char *FailureReason = nullptr;
Optional<LoopStructure> MaybeLoopStructure =
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 8b54abd..83ac915 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@@ -32,7 +33,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -78,7 +78,6 @@ namespace {
/// revectored to the false side of the second if.
///
class JumpThreading : public FunctionPass {
- const DataLayout *DL;
TargetLibraryInfo *TLI;
LazyValueInfo *LVI;
#ifdef NDEBUG
@@ -159,8 +158,6 @@ bool JumpThreading::runOnFunction(Function &F) {
return false;
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
LVI = &getAnalysis<LazyValueInfo>();
@@ -505,6 +502,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
assert(Preference == WantInteger && "Compares only produce integers");
PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
if (PN && PN->getParent() == BB) {
+ const DataLayout &DL = PN->getModule()->getDataLayout();
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -709,7 +707,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Run constant folding to see if we can reduce the condition to a simple
// constant.
if (Instruction *I = dyn_cast<Instruction>(Condition)) {
- Value *SimpleVal = ConstantFoldInstruction(I, DL, TLI);
+ Value *SimpleVal =
+ ConstantFoldInstruction(I, BB->getModule()->getDataLayout(), TLI);
if (SimpleVal) {
I->replaceAllUsesWith(SimpleVal);
I->eraseFromParent();
@@ -1521,7 +1520,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// At this point, the IR is fully up to date and consistent. Do a quick scan
// over the new instructions and zap any that are constants or dead. This
// frequently happens because of phi translation.
- SimplifyInstructionsInBlock(NewBB, DL, TLI);
+ SimplifyInstructionsInBlock(NewBB, TLI);
// Threaded an edge!
++NumThreads;
@@ -1586,7 +1585,6 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
BasicBlock::iterator BI = BB->begin();
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
-
// Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
@@ -1603,7 +1601,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// If this instruction can be simplified after the operands are updated,
// just use the simplified value instead. This frequently happens due to
// phi translation.
- if (Value *IV = SimplifyInstruction(New, DL)) {
+ if (Value *IV =
+ SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
delete New;
ValueMapping[BI] = IV;
} else {
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 14af38b..1333b02 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -38,6 +38,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -52,7 +53,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -76,21 +76,21 @@ static bool isNotUsedInLoop(Instruction &I, Loop *CurLoop);
static bool hoist(Instruction &I, BasicBlock *Preheader);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST );
-static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT,
- Loop *CurLoop, LICMSafetyInfo * SafetyInfo);
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,DominatorTree *DT,
- const DataLayout *DL, Loop *CurLoop,
- LICMSafetyInfo * SafetyInfo);
+static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT,
+ Loop *CurLoop, LICMSafetyInfo *SafetyInfo);
+static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT,
+ Loop *CurLoop,
+ LICMSafetyInfo *SafetyInfo);
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
const AAMDNodes &AAInfo,
AliasSetTracker *CurAST);
static Instruction *CloneInstructionInExitBlock(Instruction &I,
BasicBlock &ExitBlock,
PHINode &PN, LoopInfo *LI);
-static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
- DominatorTree *DT, const DataLayout *DL,
- Loop *CurLoop, AliasSetTracker *CurAST,
- LICMSafetyInfo * SafetyInfo);
+static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
+ DominatorTree *DT, Loop *CurLoop,
+ AliasSetTracker *CurAST,
+ LICMSafetyInfo *SafetyInfo);
namespace {
struct LICM : public LoopPass {
@@ -130,7 +130,6 @@ namespace {
LoopInfo *LI; // Current LoopInfo
DominatorTree *DT; // Dominator Tree for the current Loop.
- const DataLayout *DL; // DataLayout for constant folding.
TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding.
// State that is updated as we process loops.
@@ -181,8 +180,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -235,10 +232,10 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// instructions, we perform another pass to hoist them out of the loop.
//
if (L->hasDedicatedExits())
- Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, DL, TLI,
- CurLoop, CurAST, &SafetyInfo);
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop,
+ CurAST, &SafetyInfo);
if (Preheader)
- Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, DL, TLI,
+ Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI,
CurLoop, CurAST, &SafetyInfo);
// Now that all loop invariants have been removed from the loop, promote any
@@ -291,10 +288,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
/// first order w.r.t the DominatorTree. This allows us to visit uses before
/// definitions, allowing us to sink a loop body in one pass without iteration.
///
-bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
- DominatorTree *DT, const DataLayout *DL,
- TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, LICMSafetyInfo * SafetyInfo) {
+bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
+ DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+ AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr &&
@@ -311,8 +307,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |= sinkRegion(Children[i], AA, LI, DT, DL, TLI, CurLoop,
- CurAST, SafetyInfo);
+ Changed |=
+ sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
if (inSubLoop(BB,CurLoop,LI)) return Changed;
@@ -336,8 +332,8 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
- if (isNotUsedInLoop(I, CurLoop) &&
- canSinkOrHoistInst(I, AA, DT, DL, CurLoop, CurAST, SafetyInfo)) {
+ if (isNotUsedInLoop(I, CurLoop) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) {
++II;
Changed |= sink(I, LI, DT, CurLoop, CurAST);
}
@@ -350,10 +346,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
/// order w.r.t the DominatorTree. This allows us to visit definitions before
/// uses, allowing us to hoist a loop body in one pass without iteration.
///
-bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
- DominatorTree *DT, const DataLayout *DL,
- TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
+bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
+ DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
+ AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr &&
DT != nullptr && CurLoop != nullptr && CurAST != nullptr &&
@@ -372,7 +367,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to just
// fold it.
- if (Constant *C = ConstantFoldInstruction(&I, DL, TLI)) {
+ if (Constant *C = ConstantFoldInstruction(
+ &I, I.getModule()->getDataLayout(), TLI)) {
DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
CurAST->copyValue(&I, C);
CurAST->deleteValue(&I);
@@ -385,16 +381,16 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
//
- if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, DL, CurLoop, CurAST, SafetyInfo) &&
- isSafeToExecuteUnconditionally(I, DT, DL, CurLoop, SafetyInfo))
+ if (CurLoop->hasLoopInvariantOperands(&I) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) &&
+ isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo))
Changed |= hoist(I, CurLoop->getLoopPreheader());
}
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |= hoistRegion(Children[i], AA, LI, DT, DL, TLI, CurLoop,
- CurAST, SafetyInfo);
+ Changed |=
+ hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
return Changed;
}
@@ -424,10 +420,9 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
/// instruction.
///
-bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
- DominatorTree *DT, const DataLayout *DL,
- Loop *CurLoop, AliasSetTracker *CurAST,
- LICMSafetyInfo * SafetyInfo) {
+bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
+ Loop *CurLoop, AliasSetTracker *CurAST,
+ LICMSafetyInfo *SafetyInfo) {
// Loads have extra constraints we have to verify before we can hoist them.
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (!LI->isUnordered())
@@ -487,7 +482,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
!isa<InsertValueInst>(I))
return false;
- return isSafeToExecuteUnconditionally(I, DT, DL, CurLoop, SafetyInfo);
+ return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo);
}
/// Returns true if a PHINode is a trivially replaceable with an
@@ -643,10 +638,10 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) {
/// or if it is a trapping instruction and is guaranteed to execute.
///
static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT,
- const DataLayout *DL, Loop *CurLoop,
- LICMSafetyInfo * SafetyInfo) {
+ Loop *CurLoop,
+ LICMSafetyInfo *SafetyInfo) {
// If it is not a trapping instruction, it is always safe to hoist.
- if (isSafeToSpeculativelyExecute(&Inst, DL))
+ if (isSafeToSpeculativelyExecute(&Inst))
return true;
return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo);
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 11e4d76..1f33f72 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -12,17 +12,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
-
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/TargetFolder.h"
-#include "llvm/Pass.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -52,13 +52,10 @@ struct LoadPOPPair {
class LoadCombine : public BasicBlockPass {
LLVMContext *C;
- const DataLayout *DL;
AliasAnalysis *AA;
public:
- LoadCombine()
- : BasicBlockPass(ID),
- C(nullptr), DL(nullptr), AA(nullptr) {
+ LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
@@ -85,12 +82,6 @@ private:
bool LoadCombine::doInitialization(Function &F) {
DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
C = &F.getContext();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP) {
- DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n");
- return false;
- }
- DL = &DLP->getDataLayout();
return true;
}
@@ -100,9 +91,10 @@ PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
POP.Offset = 0;
while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
- unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType());
+ auto &DL = LI.getModule()->getDataLayout();
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
APInt Offset(BitWidth, 0);
- if (GEP->accumulateConstantOffset(*DL, Offset))
+ if (GEP->accumulateConstantOffset(DL, Offset))
POP.Offset += Offset.getZExtValue();
else
// Can't handle GEPs with variable indices.
@@ -145,7 +137,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
if (PrevOffset == -1ull) {
BaseLoad = L.Load;
PrevOffset = L.POP.Offset;
- PrevSize = DL->getTypeStoreSize(L.Load->getType());
+ PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
+ L.Load->getType());
AggregateLoads.push_back(L);
continue;
}
@@ -164,7 +157,8 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
// FIXME: We may want to handle this case.
continue;
PrevOffset = L.POP.Offset;
- PrevSize = DL->getTypeStoreSize(L.Load->getType());
+ PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize(
+ L.Load->getType());
AggregateLoads.push_back(L);
}
if (combineLoads(AggregateLoads))
@@ -215,7 +209,8 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
for (const auto &L : Loads) {
Builder->SetInsertPoint(L.Load);
Value *V = Builder->CreateExtractInteger(
- *DL, NewLoad, cast<IntegerType>(L.Load->getType()),
+ L.Load->getModule()->getDataLayout(), NewLoad,
+ cast<IntegerType>(L.Load->getType()),
L.POP.Offset - Loads[0].POP.Offset, "combine.extract");
L.Load->replaceAllUsesWith(V);
}
@@ -225,13 +220,13 @@ bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
}
bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
- if (skipOptnoneFunction(BB) || !DL)
+ if (skipOptnoneFunction(BB))
return false;
AA = &getAnalysis<AliasAnalysis>();
- IRBuilder<true, TargetFolder>
- TheBuilder(BB.getContext(), TargetFolder(DL));
+ IRBuilder<true, TargetFolder> TheBuilder(
+ BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
Builder = &TheBuilder;
DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 243c624..7bc2917 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -47,6 +47,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
@@ -56,7 +57,6 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -130,7 +130,6 @@ namespace {
class LoopIdiomRecognize : public LoopPass {
Loop *CurLoop;
- const DataLayout *DL;
DominatorTree *DT;
ScalarEvolution *SE;
TargetLibraryInfo *TLI;
@@ -139,7 +138,10 @@ namespace {
static char ID;
explicit LoopIdiomRecognize() : LoopPass(ID) {
initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr;
+ DT = nullptr;
+ SE = nullptr;
+ TLI = nullptr;
+ TTI = nullptr;
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -179,14 +181,6 @@ namespace {
AU.addRequired<TargetTransformInfoWrapperPass>();
}
- const DataLayout *getDataLayout() {
- if (DL)
- return DL;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- return DL;
- }
-
DominatorTree *getDominatorTree() {
return DT ? DT
: (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree());
@@ -625,10 +619,6 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
if (BECst->getValue()->getValue() == 0)
return false;
- // We require target data for now.
- if (!getDataLayout())
- return false;
-
// set DT
(void)getDominatorTree();
@@ -742,7 +732,8 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
Value *StorePtr = SI->getPointerOperand();
// Reject stores that are so large that they overflow an unsigned.
- uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
+ uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType());
if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
return false;
@@ -917,7 +908,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
Constant *PatternValue = nullptr;
-
+ auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
// If we're allowed to form a memset, and the stored value would be acceptable
@@ -928,9 +919,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
PatternValue = nullptr;
- } else if (DestAS == 0 &&
- TLI->has(LibFunc::memset_pattern16) &&
- (PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
+ } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) &&
+ (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
// Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
SplatValue = nullptr;
@@ -945,7 +935,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE, "loop-idiom");
+ SCEVExpander Expander(*SE, DL, "loop-idiom");
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
@@ -1005,7 +995,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
- GlobalValue::InternalLinkage,
+ GlobalValue::PrivateLinkage,
PatternValue, ".memset_pattern");
GV->setUnnamedAddr(true); // Ok to merge these.
GV->setAlignment(16);
@@ -1042,7 +1032,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
// header. This allows us to insert code for it in the preheader.
BasicBlock *Preheader = CurLoop->getLoopPreheader();
IRBuilder<> Builder(Preheader->getTerminator());
- SCEVExpander Expander(*SE, "loop-idiom");
+ const DataLayout &DL = Preheader->getModule()->getDataLayout();
+ SCEVExpander Expander(*SE, DL, "loop-idiom");
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 6dc600e..e125026 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -77,8 +77,6 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
@@ -110,6 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
WorklistItem Item = VisitStack.pop_back_val();
BasicBlock *BB = Item.getPointer();
bool IsSubloopHeader = Item.getInt();
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
// Simplify instructions in the current basic block.
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
new file mode 100644
index 0000000..f7626c5
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -0,0 +1,1154 @@
+//===- LoopInterchange.cpp - Loop interchange pass------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This Pass handles loop interchange transform.
+// This pass interchanges loops to provide a more cache-friendly memory access
+// patterns.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-interchange"
+
+namespace {
+
+typedef SmallVector<Loop *, 8> LoopVector;
+
+// TODO: Check if we can use a sparse matrix here.
+typedef std::vector<std::vector<char>> CharMatrix;
+
+// Maximum number of dependencies that can be handled in the dependency matrix.
+static const unsigned MaxMemInstrCount = 100;
+
+// Maximum loop depth supported.
+static const unsigned MaxLoopNestDepth = 10;
+
+struct LoopInterchange;
+
+#ifdef DUMP_DEP_MATRICIES
+void printDepMatrix(CharMatrix &DepMatrix) {
+ for (auto I = DepMatrix.begin(), E = DepMatrix.end(); I != E; ++I) {
+ std::vector<char> Vec = *I;
+ for (auto II = Vec.begin(), EE = Vec.end(); II != EE; ++II)
+ DEBUG(dbgs() << *II << " ");
+ DEBUG(dbgs() << "\n");
+ }
+}
+#endif
+
+bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L,
+ DependenceAnalysis *DA) {
+ typedef SmallVector<Value *, 16> ValueVector;
+ ValueVector MemInstr;
+
+ if (Level > MaxLoopNestDepth) {
+ DEBUG(dbgs() << "Cannot handle loops of depth greater than "
+ << MaxLoopNestDepth << "\n");
+ return false;
+ }
+
+ // For each block.
+ for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
+ BB != BE; ++BB) {
+ // Scan the BB and collect legal loads and stores.
+ for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E;
+ ++I) {
+ Instruction *Ins = dyn_cast<Instruction>(I);
+ if (!Ins)
+ return false;
+ LoadInst *Ld = dyn_cast<LoadInst>(I);
+ StoreInst *St = dyn_cast<StoreInst>(I);
+ if (!St && !Ld)
+ continue;
+ if (Ld && !Ld->isSimple())
+ return false;
+ if (St && !St->isSimple())
+ return false;
+ MemInstr.push_back(I);
+ }
+ }
+
+ DEBUG(dbgs() << "Found " << MemInstr.size()
+ << " Loads and Stores to analyze\n");
+
+ ValueVector::iterator I, IE, J, JE;
+
+ for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
+ for (J = I, JE = MemInstr.end(); J != JE; ++J) {
+ std::vector<char> Dep;
+ Instruction *Src = dyn_cast<Instruction>(*I);
+ Instruction *Des = dyn_cast<Instruction>(*J);
+ if (Src == Des)
+ continue;
+ if (isa<LoadInst>(Src) && isa<LoadInst>(Des))
+ continue;
+ if (auto D = DA->depends(Src, Des, true)) {
+ DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des
+ << "\n");
+ if (D->isFlow()) {
+ // TODO: Handle Flow dependence.Check if it is sufficient to populate
+ // the Dependence Matrix with the direction reversed.
+ DEBUG(dbgs() << "Flow dependence not handled");
+ return false;
+ }
+ if (D->isAnti()) {
+ DEBUG(dbgs() << "Found Anti dependence \n");
+ unsigned Levels = D->getLevels();
+ char Direction;
+ for (unsigned II = 1; II <= Levels; ++II) {
+ const SCEV *Distance = D->getDistance(II);
+ const SCEVConstant *SCEVConst =
+ dyn_cast_or_null<SCEVConstant>(Distance);
+ if (SCEVConst) {
+ const ConstantInt *CI = SCEVConst->getValue();
+ if (CI->isNegative())
+ Direction = '<';
+ else if (CI->isZero())
+ Direction = '=';
+ else
+ Direction = '>';
+ Dep.push_back(Direction);
+ } else if (D->isScalar(II)) {
+ Direction = 'S';
+ Dep.push_back(Direction);
+ } else {
+ unsigned Dir = D->getDirection(II);
+ if (Dir == Dependence::DVEntry::LT ||
+ Dir == Dependence::DVEntry::LE)
+ Direction = '<';
+ else if (Dir == Dependence::DVEntry::GT ||
+ Dir == Dependence::DVEntry::GE)
+ Direction = '>';
+ else if (Dir == Dependence::DVEntry::EQ)
+ Direction = '=';
+ else
+ Direction = '*';
+ Dep.push_back(Direction);
+ }
+ }
+ while (Dep.size() != Level) {
+ Dep.push_back('I');
+ }
+
+ DepMatrix.push_back(Dep);
+ if (DepMatrix.size() > MaxMemInstrCount) {
+ DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
+ << " dependencies inside loop\n");
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // We don't have a DepMatrix to check legality return false
+ if (DepMatrix.size() == 0)
+ return false;
+ return true;
+}
+
+// A loop is moved from index 'from' to an index 'to'. Update the Dependence
+// matrix by exchanging the two columns.
+void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx,
+ unsigned ToIndx) {
+ unsigned numRows = DepMatrix.size();
+ for (unsigned i = 0; i < numRows; ++i) {
+ char TmpVal = DepMatrix[i][ToIndx];
+ DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx];
+ DepMatrix[i][FromIndx] = TmpVal;
+ }
+}
+
+// Checks if outermost non '=','S'or'I' dependence in the dependence matrix is
+// '>'
+bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
+ unsigned Column) {
+ for (unsigned i = 0; i <= Column; ++i) {
+ if (DepMatrix[Row][i] == '<')
+ return false;
+ if (DepMatrix[Row][i] == '>')
+ return true;
+ }
+ // All dependencies were '=','S' or 'I'
+ return false;
+}
+
+// Checks if no dependence exist in the dependency matrix in Row before Column.
+bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
+ unsigned Column) {
+ for (unsigned i = 0; i < Column; ++i) {
+ if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' ||
+ DepMatrix[Row][i] != 'I')
+ return false;
+ }
+ return true;
+}
+
+bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
+ unsigned OuterLoopId, char InnerDep, char OuterDep) {
+
+ if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId))
+ return false;
+
+ if (InnerDep == OuterDep)
+ return true;
+
+ // It is legal to interchange if and only if after interchange no row has a
+ // '>' direction as the leftmost non-'='.
+
+ if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I')
+ return true;
+
+ if (InnerDep == '<')
+ return true;
+
+ if (InnerDep == '>') {
+ // If OuterLoopId represents outermost loop then interchanging will make the
+ // 1st dependency as '>'
+ if (OuterLoopId == 0)
+ return false;
+
+ // If all dependencies before OuterloopId are '=','S'or 'I'. Then
+ // interchanging will result in this row having an outermost non '='
+ // dependency of '>'
+ if (!containsNoDependence(DepMatrix, Row, OuterLoopId))
+ return true;
+ }
+
+ return false;
+}
+
+// Checks if it is legal to interchange 2 loops.
+// [Theorm] A permutation of the loops in a perfect nest is legal if and only if
+// the direction matrix, after the same permutation is applied to its columns,
+// has no ">" direction as the leftmost non-"=" direction in any row.
+bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId,
+ unsigned OuterLoopId) {
+
+ unsigned NumRows = DepMatrix.size();
+ // For each row check if it is valid to interchange.
+ for (unsigned Row = 0; Row < NumRows; ++Row) {
+ char InnerDep = DepMatrix[Row][InnerLoopId];
+ char OuterDep = DepMatrix[Row][OuterLoopId];
+ if (InnerDep == '*' || OuterDep == '*')
+ return false;
+ else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep,
+ OuterDep))
+ return false;
+ }
+ return true;
+}
+
+static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
+
+ DEBUG(dbgs() << "Calling populateWorklist called\n");
+ LoopVector LoopList;
+ Loop *CurrentLoop = &L;
+ std::vector<Loop *> vec = CurrentLoop->getSubLoopsVector();
+ while (vec.size() != 0) {
+ // The current loop has multiple subloops in it hence it is not tightly
+ // nested.
+ // Discard all loops above it added into Worklist.
+ if (vec.size() != 1) {
+ LoopList.clear();
+ return;
+ }
+ LoopList.push_back(CurrentLoop);
+ CurrentLoop = *(vec.begin());
+ vec = CurrentLoop->getSubLoopsVector();
+ }
+ LoopList.push_back(CurrentLoop);
+ V.push_back(LoopList);
+}
+
+static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
+ PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
+ if (InnerIndexVar)
+ return InnerIndexVar;
+ if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr)
+ return nullptr;
+ for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PhiVar = cast<PHINode>(I);
+ Type *PhiTy = PhiVar->getType();
+ if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
+ !PhiTy->isPointerTy())
+ return nullptr;
+ const SCEVAddRecExpr *AddRec =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar));
+ if (!AddRec || !AddRec->isAffine())
+ continue;
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C)
+ continue;
+ // Found the induction variable.
+ // FIXME: Handle loops with more than one induction variable. Note that,
+ // currently, legality makes sure we have only one induction variable.
+ return PhiVar;
+ }
+ return nullptr;
+}
+
+/// LoopInterchangeLegality checks if it is legal to interchange the loop.
+class LoopInterchangeLegality {
+public:
+ LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
+ LoopInterchange *Pass)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass) {}
+
+ /// Check if the loops can be interchanged.
+ bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
+ CharMatrix &DepMatrix);
+ /// Check if the loop structure is understood. We do not handle triangular
+ /// loops for now.
+ bool isLoopStructureUnderstood(PHINode *InnerInductionVar);
+
+ bool currentLimitations();
+
+private:
+ bool tightlyNested(Loop *Outer, Loop *Inner);
+
+ Loop *OuterLoop;
+ Loop *InnerLoop;
+
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ LoopInterchange *CurrentPass;
+};
+
+/// LoopInterchangeProfitability checks if it is profitable to interchange the
+/// loop.
+class LoopInterchangeProfitability {
+public:
+ LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
+
+ /// Check if the loop interchange is profitable
+ bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
+ CharMatrix &DepMatrix);
+
+private:
+ int getInstrOrderCost();
+
+ Loop *OuterLoop;
+ Loop *InnerLoop;
+
+ /// Scev analysis.
+ ScalarEvolution *SE;
+};
+
+/// LoopInterchangeTransform interchanges the loop
+class LoopInterchangeTransform {
+public:
+ LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
+ LoopInfo *LI, DominatorTree *DT,
+ LoopInterchange *Pass, BasicBlock *LoopNestExit)
+ : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
+ LoopExit(LoopNestExit) {}
+
+ /// Interchange OuterLoop and InnerLoop.
+ bool transform();
+ void restructureLoops(Loop *InnerLoop, Loop *OuterLoop);
+ void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
+
+private:
+ void splitInnerLoopLatch(Instruction *);
+ void splitOuterLoopLatch();
+ void splitInnerLoopHeader();
+ bool adjustLoopLinks();
+ void adjustLoopPreheaders();
+ void adjustOuterLoopPreheader();
+ void adjustInnerLoopPreheader();
+ bool adjustLoopBranches();
+
+ Loop *OuterLoop;
+ Loop *InnerLoop;
+
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ LoopInfo *LI;
+ DominatorTree *DT;
+ BasicBlock *LoopExit;
+};
+
+// Main LoopInterchange Pass
+struct LoopInterchange : public FunctionPass {
+ static char ID;
+ ScalarEvolution *SE;
+ LoopInfo *LI;
+ DependenceAnalysis *DA;
+ DominatorTree *DT;
+ LoopInterchange()
+ : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
+ initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<DependenceAnalysis>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ }
+
+ bool runOnFunction(Function &F) override {
+ SE = &getAnalysis<ScalarEvolution>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DA = &getAnalysis<DependenceAnalysis>();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ // Build up a worklist of loop pairs to analyze.
+ SmallVector<LoopVector, 8> Worklist;
+
+ for (Loop *L : *LI)
+ populateWorklist(*L, Worklist);
+
+ DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
+ bool Changed = true;
+ while (!Worklist.empty()) {
+ LoopVector LoopList = Worklist.pop_back_val();
+ Changed = processLoopList(LoopList);
+ }
+ return Changed;
+ }
+
+ bool isComputableLoopNest(LoopVector LoopList) {
+ for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) {
+ Loop *L = *I;
+ const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
+ if (ExitCountOuter == SE->getCouldNotCompute()) {
+ DEBUG(dbgs() << "Couldn't compute Backedge count\n");
+ return false;
+ }
+ if (L->getNumBackEdges() != 1) {
+ DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
+ return false;
+ }
+ if (!L->getExitingBlock()) {
+ DEBUG(dbgs() << "Loop Doesn't have unique exit block\n");
+ return false;
+ }
+ }
+ return true;
+ }
+
+ unsigned selectLoopForInterchange(LoopVector LoopList) {
+ // TODO: Add a better heuristic to select the loop to be interchanged based
+ // on the dependece matrix. Currently we select the innermost loop.
+ return LoopList.size() - 1;
+ }
+
+ bool processLoopList(LoopVector LoopList) {
+ bool Changed = false;
+ bool containsLCSSAPHI = false;
+ CharMatrix DependencyMatrix;
+ if (LoopList.size() < 2) {
+ DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
+ return false;
+ }
+ if (!isComputableLoopNest(LoopList)) {
+ DEBUG(dbgs() << "Not vaild loop candidate for interchange\n");
+ return false;
+ }
+ Loop *OuterMostLoop = *(LoopList.begin());
+
+ DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size()
+ << "\n");
+
+ if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(),
+ OuterMostLoop, DA)) {
+ DEBUG(dbgs() << "Populating Dependency matrix failed\n");
+ return false;
+ }
+#ifdef DUMP_DEP_MATRICIES
+ DEBUG(dbgs() << "Dependence before inter change \n");
+ printDepMatrix(DependencyMatrix);
+#endif
+
+ BasicBlock *OuterMostLoopLatch = OuterMostLoop->getLoopLatch();
+ BranchInst *OuterMostLoopLatchBI =
+ dyn_cast<BranchInst>(OuterMostLoopLatch->getTerminator());
+ if (!OuterMostLoopLatchBI)
+ return false;
+
+ // Since we currently do not handle LCSSA PHI's any failure in loop
+ // condition will now branch to LoopNestExit.
+ // TODO: This should be removed once we handle LCSSA PHI nodes.
+
+ // Get the Outermost loop exit.
+ BasicBlock *LoopNestExit;
+ if (OuterMostLoopLatchBI->getSuccessor(0) == OuterMostLoop->getHeader())
+ LoopNestExit = OuterMostLoopLatchBI->getSuccessor(1);
+ else
+ LoopNestExit = OuterMostLoopLatchBI->getSuccessor(0);
+
+ for (auto I = LoopList.begin(), E = LoopList.end(); I != E; ++I) {
+ Loop *L = *I;
+ BasicBlock *Latch = L->getLoopLatch();
+ BasicBlock *Header = L->getHeader();
+ if (Latch && Latch != Header && isa<PHINode>(Latch->begin())) {
+ containsLCSSAPHI = true;
+ break;
+ }
+ }
+
+ // TODO: Handle lcssa PHI's. Currently LCSSA PHI's are not handled. Handle
+ // the same by splitting the loop latch and adjusting loop links
+ // accordingly.
+ if (containsLCSSAPHI)
+ return false;
+
+ unsigned SelecLoopId = selectLoopForInterchange(LoopList);
+ // Move the selected loop outwards to the best posible position.
+ for (unsigned i = SelecLoopId; i > 0; i--) {
+ bool Interchanged =
+ processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
+ if (!Interchanged)
+ return Changed;
+ // Loops interchanged reflect the same in LoopList
+ std::swap(LoopList[i - 1], LoopList[i]);
+
+ // Update the DependencyMatrix
+ interChangeDepedencies(DependencyMatrix, i, i - 1);
+
+#ifdef DUMP_DEP_MATRICIES
+ DEBUG(dbgs() << "Dependence after inter change \n");
+ printDepMatrix(DependencyMatrix);
+#endif
+ Changed |= Interchanged;
+ }
+ return Changed;
+ }
+
+ bool processLoop(LoopVector LoopList, unsigned InnerLoopId,
+ unsigned OuterLoopId, BasicBlock *LoopNestExit,
+ std::vector<std::vector<char>> &DependencyMatrix) {
+
+ DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId
+ << " and OuterLoopId = " << OuterLoopId << "\n");
+ Loop *InnerLoop = LoopList[InnerLoopId];
+ Loop *OuterLoop = LoopList[OuterLoopId];
+
+ LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this);
+ if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+ DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
+ return false;
+ }
+ DEBUG(dbgs() << "Loops are legal to interchange\n");
+ LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE);
+ if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
+ DEBUG(dbgs() << "Interchanging Loops not profitable\n");
+ return false;
+ }
+
+ LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this,
+ LoopNestExit);
+ LIT.transform();
+ DEBUG(dbgs() << "Loops interchanged\n");
+ return true;
+ }
+};
+
+} // end of namespace
+
+static bool containsUnsafeInstructions(BasicBlock *BB) {
+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+ return true;
+ }
+ return false;
+}
+
+bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+
+ DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n");
+
+ // A perfectly nested loop will not have any branch in between the outer and
+ // inner block i.e. outer header will branch to either inner preheader and
+ // outerloop latch.
+ BranchInst *outerLoopHeaderBI =
+ dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
+ if (!outerLoopHeaderBI)
+ return false;
+ unsigned num = outerLoopHeaderBI->getNumSuccessors();
+ for (unsigned i = 0; i < num; i++) {
+ if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
+ outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
+ return false;
+ }
+
+ DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
+ // We do not have any basic block in between now make sure the outer header
+ // and outer loop latch doesnt contain any unsafe instructions.
+ if (containsUnsafeInstructions(OuterLoopHeader) ||
+ containsUnsafeInstructions(OuterLoopLatch))
+ return false;
+
+ DEBUG(dbgs() << "Loops are perfectly nested \n");
+ // We have a perfect loop nest.
+ return true;
+}
+
+static unsigned getPHICount(BasicBlock *BB) {
+ unsigned PhiCount = 0;
+ for (auto I = BB->begin(); isa<PHINode>(I); ++I)
+ PhiCount++;
+ return PhiCount;
+}
+
+bool LoopInterchangeLegality::isLoopStructureUnderstood(
+ PHINode *InnerInduction) {
+
+ unsigned Num = InnerInduction->getNumOperands();
+ BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
+ for (unsigned i = 0; i < Num; ++i) {
+ Value *Val = InnerInduction->getOperand(i);
+ if (isa<Constant>(Val))
+ continue;
+ Instruction *I = dyn_cast<Instruction>(Val);
+ if (!I)
+ return false;
+ // TODO: Handle triangular loops.
+ // e.g. for(int i=0;i<N;i++)
+ // for(int j=i;j<N;j++)
+ unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
+ if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
+ InnerLoopPreheader &&
+ !OuterLoop->isLoopInvariant(I)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// This function indicates the current limitations in the transform as a result
+// of which we do not proceed.
+bool LoopInterchangeLegality::currentLimitations() {
+
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+
+ PHINode *InnerInductionVar;
+ PHINode *OuterInductionVar;
+
+ // We currently handle only 1 induction variable inside the loop. We also do
+ // not handle reductions as of now.
+ if (getPHICount(InnerLoopHeader) > 1)
+ return true;
+
+ if (getPHICount(OuterLoopHeader) > 1)
+ return true;
+
+ InnerInductionVar = getInductionVariable(InnerLoop, SE);
+ OuterInductionVar = getInductionVariable(OuterLoop, SE);
+
+ if (!OuterInductionVar || !InnerInductionVar) {
+ DEBUG(dbgs() << "Induction variable not found\n");
+ return true;
+ }
+
+ // TODO: Triangular loops are not handled for now.
+ if (!isLoopStructureUnderstood(InnerInductionVar)) {
+ DEBUG(dbgs() << "Loop structure not understood by pass\n");
+ return true;
+ }
+
+ // TODO: Loops with LCSSA PHI's are currently not handled.
+ if (isa<PHINode>(OuterLoopLatch->begin())) {
+ DEBUG(dbgs() << "Found and LCSSA PHI in outer loop latch\n");
+ return true;
+ }
+ if (InnerLoopLatch != InnerLoopHeader &&
+ isa<PHINode>(InnerLoopLatch->begin())) {
+ DEBUG(dbgs() << "Found and LCSSA PHI in inner loop latch\n");
+ return true;
+ }
+
+ // TODO: Current limitation: Since we split the inner loop latch at the point
+ // were induction variable is incremented (induction.next); We cannot have
+ // more than 1 user of induction.next since it would result in broken code
+ // after split.
+ // e.g.
+ // for(i=0;i<N;i++) {
+ // for(j = 0;j<M;j++) {
+ // A[j+1][i+2] = A[j][i]+k;
+ // }
+ // }
+ bool FoundInduction = false;
+ Instruction *InnerIndexVarInc = nullptr;
+ if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader)
+ InnerIndexVarInc =
+ dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1));
+ else
+ InnerIndexVarInc =
+ dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0));
+
+ if (!InnerIndexVarInc)
+ return true;
+
+ // Since we split the inner loop latch on this induction variable. Make sure
+ // we do not have any instruction between the induction variable and branch
+ // instruction.
+
+ for (auto I = InnerLoopLatch->rbegin(), E = InnerLoopLatch->rend();
+ I != E && !FoundInduction; ++I) {
+ if (isa<BranchInst>(*I) || isa<CmpInst>(*I) || isa<TruncInst>(*I))
+ continue;
+ const Instruction &Ins = *I;
+ // We found an instruction. If this is not induction variable then it is not
+ // safe to split this loop latch.
+ if (!Ins.isIdenticalTo(InnerIndexVarInc))
+ return true;
+ else
+ FoundInduction = true;
+ }
+ // The loop latch ended and we didnt find the induction variable return as
+ // current limitation.
+ if (!FoundInduction)
+ return true;
+
+ return false;
+}
+
+bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
+ unsigned OuterLoopId,
+ CharMatrix &DepMatrix) {
+
+ if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
+ DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
+ << "and OuterLoopId = " << OuterLoopId
+ << "due to dependence\n");
+ return false;
+ }
+
+ // Create unique Preheaders if we already do not have one.
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+
+ // Create a unique outer preheader -
+ // 1) If OuterLoop preheader is not present.
+ // 2) If OuterLoop Preheader is same as OuterLoop Header
+ // 3) If OuterLoop Preheader is same as Header of the previous loop.
+ // 4) If OuterLoop Preheader is Entry node.
+ if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
+ isa<PHINode>(OuterLoopPreHeader->begin()) ||
+ !OuterLoopPreHeader->getUniquePredecessor()) {
+ OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass);
+ }
+
+ if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
+ InnerLoopPreHeader == OuterLoop->getHeader()) {
+ InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass);
+ }
+
+ // Check if the loops are tightly nested.
+ if (!tightlyNested(OuterLoop, InnerLoop)) {
+ DEBUG(dbgs() << "Loops not tightly nested\n");
+ return false;
+ }
+
+ // TODO: The loops could not be interchanged due to current limitations in the
+ // transform module.
+ if (currentLimitations()) {
+ DEBUG(dbgs() << "Not legal because of current transform limitation\n");
+ return false;
+ }
+
+ return true;
+}
+
+int LoopInterchangeProfitability::getInstrOrderCost() {
+ unsigned GoodOrder, BadOrder;
+ BadOrder = GoodOrder = 0;
+ for (auto BI = InnerLoop->block_begin(), BE = InnerLoop->block_end();
+ BI != BE; ++BI) {
+ for (auto I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) {
+ const Instruction &Ins = *I;
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
+ unsigned NumOp = GEP->getNumOperands();
+ bool FoundInnerInduction = false;
+ bool FoundOuterInduction = false;
+ for (unsigned i = 0; i < NumOp; ++i) {
+ const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
+ if (!AR)
+ continue;
+
+ // If we find the inner induction after an outer induction e.g.
+ // for(int i=0;i<N;i++)
+ // for(int j=0;j<N;j++)
+ // A[i][j] = A[i-1][j-1]+k;
+ // then it is a good order.
+ if (AR->getLoop() == InnerLoop) {
+ // We found an InnerLoop induction after OuterLoop induction. It is
+ // a good order.
+ FoundInnerInduction = true;
+ if (FoundOuterInduction) {
+ GoodOrder++;
+ break;
+ }
+ }
+ // If we find the outer induction after an inner induction e.g.
+ // for(int i=0;i<N;i++)
+ // for(int j=0;j<N;j++)
+ // A[j][i] = A[j-1][i-1]+k;
+ // then it is a bad order.
+ if (AR->getLoop() == OuterLoop) {
+ // We found an OuterLoop induction after InnerLoop induction. It is
+ // a bad order.
+ FoundOuterInduction = true;
+ if (FoundInnerInduction) {
+ BadOrder++;
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ return GoodOrder - BadOrder;
+}
+
+static bool isProfitabileForVectorization(unsigned InnerLoopId,
+ unsigned OuterLoopId,
+ CharMatrix &DepMatrix) {
+ // TODO: Improve this heuristic to catch more cases.
+ // If the inner loop is loop independent or doesn't carry any dependency it is
+ // profitable to move this to outer position.
+ unsigned Row = DepMatrix.size();
+ for (unsigned i = 0; i < Row; ++i) {
+ if (DepMatrix[i][InnerLoopId] != 'S' && DepMatrix[i][InnerLoopId] != 'I')
+ return false;
+ // TODO: We need to improve this heuristic.
+ if (DepMatrix[i][OuterLoopId] != '=')
+ return false;
+ }
+ // If outer loop has dependence and inner loop is loop independent then it is
+ // profitable to interchange to enable parallelism.
+ return true;
+}
+
+bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
+ unsigned OuterLoopId,
+ CharMatrix &DepMatrix) {
+
+ // TODO: Add Better Profitibility checks.
+ // e.g
+ // 1) Construct dependency matrix and move the one with no loop carried dep
+ // inside to enable vectorization.
+
+ // This is rough cost estimation algorithm. It counts the good and bad order
+ // of induction variables in the instruction and allows reordering if number
+ // of bad orders is more than good.
+ int Cost = 0;
+ Cost += getInstrOrderCost();
+ DEBUG(dbgs() << "Cost = " << Cost << "\n");
+ if (Cost < 0)
+ return true;
+
+ // It is not profitable as per current cache profitibility model. But check if
+ // we can move this loop outside to improve parallelism.
+ bool ImprovesPar =
+ isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
+ return ImprovesPar;
+}
+
+void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
+ Loop *InnerLoop) {
+ for (Loop::iterator I = OuterLoop->begin(), E = OuterLoop->end(); I != E;
+ ++I) {
+ if (*I == InnerLoop) {
+ OuterLoop->removeChildLoop(I);
+ return;
+ }
+ }
+ assert(false && "Couldn't find loop");
+}
+
+void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
+ Loop *OuterLoop) {
+ Loop *OuterLoopParent = OuterLoop->getParentLoop();
+ if (OuterLoopParent) {
+ // Remove the loop from its parent loop.
+ removeChildLoop(OuterLoopParent, OuterLoop);
+ removeChildLoop(OuterLoop, InnerLoop);
+ OuterLoopParent->addChildLoop(InnerLoop);
+ } else {
+ removeChildLoop(OuterLoop, InnerLoop);
+ LI->changeTopLevelLoop(OuterLoop, InnerLoop);
+ }
+
+ for (Loop::iterator I = InnerLoop->begin(), E = InnerLoop->end(); I != E; ++I)
+ OuterLoop->addChildLoop(InnerLoop->removeChildLoop(I));
+
+ InnerLoop->addChildLoop(OuterLoop);
+}
+
+bool LoopInterchangeTransform::transform() {
+
+ DEBUG(dbgs() << "transform\n");
+ bool Transformed = false;
+ Instruction *InnerIndexVar;
+
+ if (InnerLoop->getSubLoops().size() == 0) {
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ DEBUG(dbgs() << "Calling Split Inner Loop\n");
+ PHINode *InductionPHI = getInductionVariable(InnerLoop, SE);
+ if (!InductionPHI) {
+ DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
+ return false;
+ }
+
+ if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
+ InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1));
+ else
+ InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0));
+
+ //
+ // Split at the place were the induction variable is
+ // incremented/decremented.
+ // TODO: This splitting logic may not work always. Fix this.
+ splitInnerLoopLatch(InnerIndexVar);
+ DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
+
+ // Splits the inner loops phi nodes out into a seperate basic block.
+ splitInnerLoopHeader();
+ DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
+ }
+
+ Transformed |= adjustLoopLinks();
+ if (!Transformed) {
+ DEBUG(dbgs() << "adjustLoopLinks Failed\n");
+ return false;
+ }
+
+ restructureLoops(InnerLoop, OuterLoop);
+ return true;
+}
+
+void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) {
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ BasicBlock *InnerLoopLatchPred = InnerLoopLatch;
+ InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI);
+}
+
+void LoopInterchangeTransform::splitOuterLoopLatch() {
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+ BasicBlock *OuterLatchLcssaPhiBlock = OuterLoopLatch;
+ OuterLoopLatch = SplitBlock(OuterLatchLcssaPhiBlock,
+ OuterLoopLatch->getFirstNonPHI(), DT, LI);
+}
+
+void LoopInterchangeTransform::splitInnerLoopHeader() {
+
+ // Split the inner loop header out.
+ BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+ SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
+
+ DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & "
+ "InnerLoopHeader \n");
+}
+
+/// \brief Move all instructions except the terminator from FromBB right before
+/// InsertBefore
+static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
+ auto &ToList = InsertBefore->getParent()->getInstList();
+ auto &FromList = FromBB->getInstList();
+
+ ToList.splice(InsertBefore, FromList, FromList.begin(),
+ FromBB->getTerminator());
+}
+
+void LoopInterchangeTransform::adjustOuterLoopPreheader() {
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerPreHeader = InnerLoop->getLoopPreheader();
+
+ moveBBContents(OuterLoopPreHeader, InnerPreHeader->getTerminator());
+}
+
+void LoopInterchangeTransform::adjustInnerLoopPreheader() {
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterHeader = OuterLoop->getHeader();
+
+ moveBBContents(InnerLoopPreHeader, OuterHeader->getTerminator());
+}
+
+bool LoopInterchangeTransform::adjustLoopBranches() {
+
+ DEBUG(dbgs() << "adjustLoopBranches called\n");
+ // Adjust the loop preheader
+ BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
+ BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
+ BasicBlock *InnerLoopLatchPredecessor =
+ InnerLoopLatch->getUniquePredecessor();
+ BasicBlock *InnerLoopLatchSuccessor;
+ BasicBlock *OuterLoopLatchSuccessor;
+
+ BranchInst *OuterLoopLatchBI =
+ dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
+ BranchInst *InnerLoopLatchBI =
+ dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
+ BranchInst *OuterLoopHeaderBI =
+ dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
+ BranchInst *InnerLoopHeaderBI =
+ dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
+
+ if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
+ !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
+ !InnerLoopHeaderBI)
+ return false;
+
+ BranchInst *InnerLoopLatchPredecessorBI =
+ dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
+ BranchInst *OuterLoopPredecessorBI =
+ dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
+
+ if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
+ return false;
+ BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor();
+ if (!InnerLoopHeaderSucessor)
+ return false;
+
+ // Adjust Loop Preheader and headers
+
+ unsigned NumSucc = OuterLoopPredecessorBI->getNumSuccessors();
+ for (unsigned i = 0; i < NumSucc; ++i) {
+ if (OuterLoopPredecessorBI->getSuccessor(i) == OuterLoopPreHeader)
+ OuterLoopPredecessorBI->setSuccessor(i, InnerLoopPreHeader);
+ }
+
+ NumSucc = OuterLoopHeaderBI->getNumSuccessors();
+ for (unsigned i = 0; i < NumSucc; ++i) {
+ if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
+ OuterLoopHeaderBI->setSuccessor(i, LoopExit);
+ else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
+ OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor);
+ }
+
+ BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
+ InnerLoopHeaderBI->eraseFromParent();
+
+ // -------------Adjust loop latches-----------
+ if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
+ InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
+ else
+ InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
+
+ NumSucc = InnerLoopLatchPredecessorBI->getNumSuccessors();
+ for (unsigned i = 0; i < NumSucc; ++i) {
+ if (InnerLoopLatchPredecessorBI->getSuccessor(i) == InnerLoopLatch)
+ InnerLoopLatchPredecessorBI->setSuccessor(i, InnerLoopLatchSuccessor);
+ }
+
+ if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
+ OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
+ else
+ OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
+
+ if (InnerLoopLatchBI->getSuccessor(1) == InnerLoopLatchSuccessor)
+ InnerLoopLatchBI->setSuccessor(1, OuterLoopLatchSuccessor);
+ else
+ InnerLoopLatchBI->setSuccessor(0, OuterLoopLatchSuccessor);
+
+ if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopLatchSuccessor) {
+ OuterLoopLatchBI->setSuccessor(0, InnerLoopLatch);
+ } else {
+ OuterLoopLatchBI->setSuccessor(1, InnerLoopLatch);
+ }
+
+ return true;
+}
+void LoopInterchangeTransform::adjustLoopPreheaders() {
+
+ // We have interchanged the preheaders so we need to interchange the data in
+ // the preheader as well.
+ // This is because the content of inner preheader was previously executed
+ // inside the outer loop.
+ BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
+ BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
+ BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
+ BranchInst *InnerTermBI =
+ cast<BranchInst>(InnerLoopPreHeader->getTerminator());
+
+ BasicBlock *HeaderSplit =
+ SplitBlock(OuterLoopHeader, OuterLoopHeader->getTerminator(), DT, LI);
+ Instruction *InsPoint = HeaderSplit->getFirstNonPHI();
+ // These instructions should now be executed inside the loop.
+ // Move instruction into a new block after outer header.
+ moveBBContents(InnerLoopPreHeader, InsPoint);
+ // These instructions were not executed previously in the loop so move them to
+ // the older inner loop preheader.
+ moveBBContents(OuterLoopPreHeader, InnerTermBI);
+}
+
+bool LoopInterchangeTransform::adjustLoopLinks() {
+
+ // Adjust all branches in the inner and outer loop.
+ bool Changed = adjustLoopBranches();
+ if (Changed)
+ adjustLoopPreheaders();
+ return Changed;
+}
+
+char LoopInterchange::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
+ "Interchanges loops for cache reuse", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+
+INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
+ "Interchanges loops for cache reuse", false, false)
+
+Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index fdf7e3b..ed103e6 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -30,7 +31,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -160,7 +160,6 @@ namespace {
AliasAnalysis *AA;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
TargetLibraryInfo *TLI;
DominatorTree *DT;
@@ -367,10 +366,8 @@ namespace {
struct DAGRootTracker {
DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
ScalarEvolution *SE, AliasAnalysis *AA,
- TargetLibraryInfo *TLI, const DataLayout *DL)
- : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI),
- DL(DL), IV(IV) {
- }
+ TargetLibraryInfo *TLI)
+ : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
/// Stage 1: Find all the DAG roots for the induction variable.
bool findRoots();
@@ -416,7 +413,6 @@ namespace {
ScalarEvolution *SE;
AliasAnalysis *AA;
TargetLibraryInfo *TLI;
- const DataLayout *DL;
// The loop induction variable.
Instruction *IV;
@@ -1131,7 +1127,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// needed because otherwise isSafeToSpeculativelyExecute returns
// false on PHI nodes.
if (!isa<PHINode>(I) && !isSimpleLoadStore(I) &&
- !isSafeToSpeculativelyExecute(I, DL))
+ !isSafeToSpeculativelyExecute(I))
// Intervening instructions cause side effects.
FutureSideEffects = true;
}
@@ -1161,11 +1157,10 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
// side effects, and this instruction might also, then we can't reorder
// them, and this matching fails. As an exception, we allow the alias
// set tracker to handle regular (simple) load/store dependencies.
- if (FutureSideEffects &&
- ((!isSimpleLoadStore(BaseInst) &&
- !isSafeToSpeculativelyExecute(BaseInst, DL)) ||
- (!isSimpleLoadStore(RootInst) &&
- !isSafeToSpeculativelyExecute(RootInst, DL)))) {
+ if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) &&
+ !isSafeToSpeculativelyExecute(BaseInst)) ||
+ (!isSimpleLoadStore(RootInst) &&
+ !isSafeToSpeculativelyExecute(RootInst)))) {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
" vs. " << *RootInst <<
" (side effects prevent reordering)\n");
@@ -1272,6 +1267,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
++J;
}
+ const DataLayout &DL = Header->getModule()->getDataLayout();
// We need to create a new induction variable for each different BaseInst.
for (auto &DRS : RootSets) {
@@ -1284,7 +1280,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
SE->getConstant(RealIVSCEV->getType(), 1),
L, SCEV::FlagAnyWrap));
{ // Limit the lifetime of SCEVExpander.
- SCEVExpander Expander(*SE, "reroll");
+ SCEVExpander Expander(*SE, DL, "reroll");
Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
for (auto &KV : Uses) {
@@ -1324,7 +1320,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
}
}
- SimplifyInstructionsInBlock(Header, DL, TLI);
+ SimplifyInstructionsInBlock(Header, TLI);
DeleteDeadPHIs(Header, TLI);
}
@@ -1448,7 +1444,7 @@ void LoopReroll::ReductionTracker::replaceSelected() {
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
const SCEV *IterCount,
ReductionTracker &Reductions) {
- DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DL);
+ DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
if (!DAGRoots.findRoots())
return false;
@@ -1477,8 +1473,6 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock *Header = L->getHeader();
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 4d12349..a675e12 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -24,8 +24,10 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -412,6 +414,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
@@ -442,8 +446,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
- // FIXME: Provide DL, TLI, DT, AC to SimplifyInstruction.
- Value *V = SimplifyInstruction(C);
+ // FIXME: Provide TLI, DT, AC to SimplifyInstruction.
+ Value *V = SimplifyInstruction(C, DL);
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 318065e..8445d5f 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -3825,7 +3826,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
if (C->getValue()->isNegative() !=
(NewF.BaseOffset < 0) &&
(C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
- .ule(abs64(NewF.BaseOffset)))
+ .ule(std::abs(NewF.BaseOffset)))
continue;
// OK, looks good.
@@ -3856,7 +3857,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
- abs64(NewF.BaseOffset)) &&
+ std::abs(NewF.BaseOffset)) &&
(C->getValue()->getValue() +
NewF.BaseOffset).countTrailingZeros() >=
countTrailingZeros<uint64_t>(NewF.BaseOffset))
@@ -4823,7 +4824,8 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
- SCEVExpander Rewriter(SE, "lsr");
+ SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),
+ "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
@@ -5093,7 +5095,8 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
Changed |= DeleteDeadPHIs(L->getHeader());
if (EnablePhiElim && L->isLoopSimplifyForm()) {
SmallVector<WeakVH, 16> DeadInsts;
- SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
#ifndef NDEBUG
Rewriter.setDebugType(DEBUG_TYPE);
#endif
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 924be16..600cbde 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -23,14 +24,13 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/IR/InstVisitor.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include <climits>
using namespace llvm;
@@ -259,6 +259,7 @@ static bool isLoadFromConstantInitializer(Value *V) {
return false;
}
+namespace {
struct FindConstantPointers {
bool LoadCanBeConstantFolded;
bool IndexIsConstant;
@@ -356,11 +357,12 @@ class UnrollAnalyzer : public InstVisitor<UnrollAnalyzer, bool> {
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;
Value *SimpleV = nullptr;
+ const DataLayout &DL = I.getModule()->getDataLayout();
if (auto FI = dyn_cast<FPMathOperator>(&I))
SimpleV =
- SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags());
+ SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
else
- SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS);
+ SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
if (SimpleV && CountedInstructions.insert(&I).second)
NumberOfOptimizedInstructions += TTI.getUserCost(&I);
@@ -540,6 +542,7 @@ public:
return NumberOfOptimizedInstructions;
}
};
+} // namespace
// Complete loop unrolling can make some loads constant, and we need to know if
// that would expose any further optimization opportunities.
@@ -619,6 +622,11 @@ static bool HasUnrollDisablePragma(const Loop *L) {
return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
}
+// Returns true if the loop has an runtime unroll(disable) pragma.
+static bool HasRuntimeUnrollDisablePragma(const Loop *L) {
+ return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable");
+}
+
// If loop has an unroll_count pragma return the (necessarily
// positive) value from the pragma. Otherwise return 0.
static unsigned UnrollCountPragmaValue(const Loop *L) {
@@ -807,6 +815,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
+ if (HasRuntimeUnrollDisablePragma(L)) {
+ AllowRuntime = false;
+ }
if (Unrolling == Partial) {
bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
if (!AllowPartial && !CountSetExplicitly) {
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 987dc96..988d2af 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -1082,6 +1083,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
/// pass.
///
void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
while (!Worklist.empty()) {
Instruction *I = Worklist.back();
Worklist.pop_back();
@@ -1104,7 +1106,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
// See if instruction simplification can hack this up. This is common for
// things like "select false, X, Y" after unswitching made the condition be
// 'false'. TODO: update the domtree properly so we can pass it here.
- if (Value *V = SimplifyInstruction(I))
+ if (Value *V = SimplifyInstruction(I, DL))
if (LI->replacementPreservesLCSSAForm(I, V)) {
ReplaceUsesOfWith(I, V, Worklist, L, LPM);
continue;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 006b885..2b5a078 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -28,7 +29,6 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
using namespace llvm;
@@ -41,7 +41,8 @@ STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
- bool &VariableIdxFound, const DataLayout &TD){
+ bool &VariableIdxFound,
+ const DataLayout &DL) {
// Skip over the first indices.
gep_type_iterator GTI = gep_type_begin(GEP);
for (unsigned i = 1; i != Idx; ++i, ++GTI)
@@ -57,13 +58,13 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
// Handle struct indices, which add their field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
// Otherwise, we have a sequential type like an array or vector. Multiply
// the index by the ElementSize.
- uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+ uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
Offset += Size*OpC->getSExtValue();
}
@@ -74,7 +75,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
/// constant offset, and return that constant offset. For example, Ptr1 might
/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
- const DataLayout &TD) {
+ const DataLayout &DL) {
Ptr1 = Ptr1->stripPointerCasts();
Ptr2 = Ptr2->stripPointerCasts();
@@ -92,12 +93,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
// If one pointer is a GEP and the other isn't, then see if the GEP is a
// constant offset from the base, as in "P" and "gep P, 1".
if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
- Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+ Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL);
return !VariableIdxFound;
}
if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
- Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+ Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL);
return !VariableIdxFound;
}
@@ -115,8 +116,8 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
break;
- int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
- int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+ int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL);
+ int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL);
if (VariableIdxFound) return false;
Offset = Offset2-Offset1;
@@ -150,12 +151,11 @@ struct MemsetRange {
/// TheStores - The actual stores that make up this range.
SmallVector<Instruction*, 16> TheStores;
- bool isProfitableToUseMemset(const DataLayout &TD) const;
-
+ bool isProfitableToUseMemset(const DataLayout &DL) const;
};
} // end anon namespace
-bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
+bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
if (TheStores.size() >= 4 || End-Start >= 16) return true;
@@ -183,7 +183,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
// size. If so, check to see whether we will end up actually reducing the
// number of stores used.
unsigned Bytes = unsigned(End-Start);
- unsigned MaxIntSize = TD.getLargestLegalIntTypeSize();
+ unsigned MaxIntSize = DL.getLargestLegalIntTypeSize();
if (MaxIntSize == 0)
MaxIntSize = 1;
unsigned NumPointerStores = Bytes / MaxIntSize;
@@ -314,14 +314,12 @@ namespace {
class MemCpyOpt : public FunctionPass {
MemoryDependenceAnalysis *MD;
TargetLibraryInfo *TLI;
- const DataLayout *DL;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
MD = nullptr;
TLI = nullptr;
- DL = nullptr;
}
bool runOnFunction(Function &F) override;
@@ -377,13 +375,13 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
- if (!DL) return nullptr;
+ const DataLayout &DL = StartInst->getModule()->getDataLayout();
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
// are stored.
- MemsetRanges Ranges(*DL);
+ MemsetRanges Ranges(DL);
BasicBlock::iterator BI = StartInst;
for (++BI; !isa<TerminatorInst>(BI); ++BI) {
@@ -406,8 +404,8 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
- if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
- Offset, *DL))
+ if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
+ DL))
break;
Ranges.addStore(Offset, NextStore);
@@ -420,7 +418,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
int64_t Offset;
- if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *DL))
+ if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL))
break;
Ranges.addMemSet(Offset, MSI);
@@ -452,7 +450,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (Range.TheStores.size() == 1) continue;
// If it is profitable to lower this range to memset, do so now.
- if (!Range.isProfitableToUseMemset(*DL))
+ if (!Range.isProfitableToUseMemset(DL))
continue;
// Otherwise, we do want to transform this! Create a new memset.
@@ -464,7 +462,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
if (Alignment == 0) {
Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
- Alignment = DL->getABITypeAlignment(EltType);
+ Alignment = DL.getABITypeAlignment(EltType);
}
AMemSet =
@@ -494,8 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
-
- if (!DL) return false;
+ const DataLayout &DL = SI->getModule()->getDataLayout();
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
@@ -525,16 +522,16 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
unsigned storeAlign = SI->getAlignment();
if (!storeAlign)
- storeAlign = DL->getABITypeAlignment(SI->getOperand(0)->getType());
+ storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
unsigned loadAlign = LI->getAlignment();
if (!loadAlign)
- loadAlign = DL->getABITypeAlignment(LI->getType());
+ loadAlign = DL.getABITypeAlignment(LI->getType());
- bool changed = performCallSlotOptzn(LI,
- SI->getPointerOperand()->stripPointerCasts(),
- LI->getPointerOperand()->stripPointerCasts(),
- DL->getTypeStoreSize(SI->getOperand(0)->getType()),
- std::min(storeAlign, loadAlign), C);
+ bool changed = performCallSlotOptzn(
+ LI, SI->getPointerOperand()->stripPointerCasts(),
+ LI->getPointerOperand()->stripPointerCasts(),
+ DL.getTypeStoreSize(SI->getOperand(0)->getType()),
+ std::min(storeAlign, loadAlign), C);
if (changed) {
MD->removeInstruction(SI);
SI->eraseFromParent();
@@ -606,15 +603,13 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (!srcAlloca)
return false;
- // Check that all of src is copied to dest.
- if (!DL) return false;
-
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
return false;
- uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) *
- srcArraySize->getZExtValue();
+ const DataLayout &DL = cpy->getModule()->getDataLayout();
+ uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
+ srcArraySize->getZExtValue();
if (cpyLen < srcSize)
return false;
@@ -628,8 +623,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
if (!destArraySize)
return false;
- uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) *
- destArraySize->getZExtValue();
+ uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
+ destArraySize->getZExtValue();
if (destSize < srcSize)
return false;
@@ -648,7 +643,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
}
- uint64_t destSize = DL->getTypeAllocSize(StructTy);
+ uint64_t destSize = DL.getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
}
@@ -659,7 +654,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
// Check that dest points to memory that is at least as aligned as src.
unsigned srcAlign = srcAlloca->getAlignment();
if (!srcAlign)
- srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType());
+ srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
// If dest is not aligned enough and we can't increase its alignment then
// bail out.
@@ -959,12 +954,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
- if (!DL) return false;
-
+ const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
- uint64_t ByValSize = DL->getTypeAllocSize(ByValTy);
+ uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
true, CS.getInstruction(),
@@ -997,8 +991,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
*CS->getParent()->getParent());
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (MDep->getAlignment() < ByValAlign &&
- getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &AC,
- CS.getInstruction(), &DT) < ByValAlign)
+ getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
+ CS.getInstruction(), &AC, &DT) < ByValAlign)
return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
@@ -1077,8 +1071,6 @@ bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
// If we don't have at least memset and memcpy, there is little point of doing
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 8fad63f..73f4296 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -81,12 +81,13 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <vector>
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 98016b4..307cc73 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -321,10 +321,8 @@ unsigned Reassociate::getRank(Value *V) {
// If this is a not or neg instruction, do not count it for rank. This
// assures us that X and ~X will have the same rank.
- Type *Ty = V->getType();
- if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) ||
- (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
- !BinaryOperator::isFNeg(I)))
+ if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
+ !BinaryOperator::isFNeg(I))
++Rank;
DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
@@ -351,7 +349,7 @@ void Reassociate::canonicalizeOperands(Instruction *I) {
static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
- if (S1->getType()->isIntegerTy())
+ if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
else {
BinaryOperator *Res =
@@ -363,7 +361,7 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
- if (S1->getType()->isIntegerTy())
+ if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
else {
BinaryOperator *Res =
@@ -375,7 +373,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) {
- if (S1->getType()->isIntegerTy())
+ if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
else {
BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
@@ -388,8 +386,8 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
///
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
Type *Ty = Neg->getType();
- Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty)
- : ConstantFP::get(Ty, -1.0);
+ Constant *NegOne = Ty->isIntOrIntVectorTy() ?
+ ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
@@ -872,7 +870,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
Constant *Undef = UndefValue::get(I->getType());
NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
Undef, Undef, "", I);
- if (NewOp->getType()->isFloatingPointTy())
+ if (NewOp->getType()->isFPOrFPVectorTy())
NewOp->setFastMathFlags(I->getFastMathFlags());
} else {
NewOp = NodesToRewrite.pop_back_val();
@@ -1520,8 +1518,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Insert a new multiply.
Type *Ty = TheOp->getType();
- Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound)
- : ConstantFP::get(Ty, NumFound);
+ Constant *C = Ty->isIntOrIntVectorTy() ?
+ ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
// Now that we have inserted a multiply, optimize it. This allows us to
@@ -1661,7 +1659,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// from an expression will drop a use of maxocc, and this can cause
// RemoveFactorFromExpression on successive values to behave differently.
Instruction *DummyInst =
- I->getType()->isIntegerTy()
+ I->getType()->isIntOrIntVectorTy()
? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
: BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
@@ -1792,7 +1790,7 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder,
Value *LHS = Ops.pop_back_val();
do {
- if (LHS->getType()->isIntegerTy())
+ if (LHS->getType()->isIntOrIntVectorTy())
LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
else
LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
@@ -2090,8 +2088,9 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (I->isCommutative())
canonicalizeOperands(I);
- // Don't optimize vector instructions.
- if (I->getType()->isVectorTy())
+ // TODO: We should optimize vector Xor instructions, but they are
+ // currently unsupported.
+ if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor)
return;
// Don't optimize floating point instructions that don't have unsafe algebra.
@@ -2170,9 +2169,6 @@ void Reassociate::OptimizeInst(Instruction *I) {
}
void Reassociate::ReassociateExpression(BinaryOperator *I) {
- assert(!I->getType()->isVectorTy() &&
- "Reassociation of vector instructions is not supported.");
-
// First, walk the expression tree, linearizing the tree, collecting the
// operand information.
SmallVector<RepeatedValue, 8> Tree;
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index ca9ab54..f5d21ff 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -548,9 +548,6 @@ public:
}
PhiState(Value *b) : status(Base), base(b) {}
PhiState() : status(Unknown), base(nullptr) {}
- PhiState(const PhiState &other) : status(other.status), base(other.base) {
- assert(status != Base || base);
- }
Status getStatus() const { return status; }
Value *getBase() const { return base; }
@@ -684,12 +681,19 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
states[def] = PhiState();
// Recursively fill in all phis & selects reachable from the initial one
// for which we don't already know a definite base value for
- // PERF: Yes, this is as horribly inefficient as it looks.
+ // TODO: This should be rewritten with a worklist
bool done = false;
while (!done) {
done = true;
+ // Since we're adding elements to 'states' as we run, we can't keep
+ // iterators into the set.
+ SmallVector<Value*, 16> Keys;
+ Keys.reserve(states.size());
for (auto Pair : states) {
- Value *v = Pair.first;
+ Value *V = Pair.first;
+ Keys.push_back(V);
+ }
+ for (Value *v : Keys) {
assert(!isKnownBaseResult(v) && "why did it get added?");
if (PHINode *phi = dyn_cast<PHINode>(v)) {
assert(phi->getNumIncomingValues() > 0 &&
@@ -730,10 +734,12 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
// have reached conflict state. The current version seems too conservative.
bool progress = true;
- size_t oldSize = 0;
while (progress) {
- oldSize = states.size();
+#ifndef NDEBUG
+ size_t oldSize = states.size();
+#endif
progress = false;
+ // We're only changing keys in this loop, thus safe to keep iterators
for (auto Pair : states) {
MeetPhiStates calculateMeet(states);
Value *v = Pair.first;
@@ -768,46 +774,58 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
}
// Insert Phis for all conflicts
+ // We want to keep naming deterministic in the loop that follows, so
+ // sort the keys before iteration. This is useful in allowing us to
+ // write stable tests. Note that there is no invalidation issue here.
+ SmallVector<Value*, 16> Keys;
+ Keys.reserve(states.size());
for (auto Pair : states) {
- Instruction *v = cast<Instruction>(Pair.first);
- PhiState state = Pair.second;
+ Value *V = Pair.first;
+ Keys.push_back(V);
+ }
+ std::sort(Keys.begin(), Keys.end(), order_by_name);
+ // TODO: adjust naming patterns to avoid this order of iteration dependency
+ for (Value *V : Keys) {
+ Instruction *v = cast<Instruction>(V);
+ PhiState state = states[V];
assert(!isKnownBaseResult(v) && "why did it get added?");
assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
- if (state.isConflict()) {
- if (isa<PHINode>(v)) {
- int num_preds =
- std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
- assert(num_preds > 0 && "how did we reach here");
- PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
- NewInsertedDefs.insert(phi);
- // Add metadata marking this as a base value
- auto *const_1 = ConstantInt::get(
- Type::getInt32Ty(
- v->getParent()->getParent()->getParent()->getContext()),
- 1);
- auto MDConst = ConstantAsMetadata::get(const_1);
- MDNode *md = MDNode::get(
- v->getParent()->getParent()->getParent()->getContext(), MDConst);
- phi->setMetadata("is_base_value", md);
- states[v] = PhiState(PhiState::Conflict, phi);
- } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
- // The undef will be replaced later
- UndefValue *undef = UndefValue::get(sel->getType());
- SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
- undef, "base_select", sel);
- NewInsertedDefs.insert(basesel);
- // Add metadata marking this as a base value
- auto *const_1 = ConstantInt::get(
- Type::getInt32Ty(
- v->getParent()->getParent()->getParent()->getContext()),
- 1);
- auto MDConst = ConstantAsMetadata::get(const_1);
- MDNode *md = MDNode::get(
- v->getParent()->getParent()->getParent()->getContext(), MDConst);
- basesel->setMetadata("is_base_value", md);
- states[v] = PhiState(PhiState::Conflict, basesel);
- } else
- llvm_unreachable("unknown conflict type");
+ if (!state.isConflict())
+ continue;
+
+ if (isa<PHINode>(v)) {
+ int num_preds =
+ std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
+ assert(num_preds > 0 && "how did we reach here");
+ PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
+ NewInsertedDefs.insert(phi);
+ // Add metadata marking this as a base value
+ auto *const_1 = ConstantInt::get(
+ Type::getInt32Ty(
+ v->getParent()->getParent()->getParent()->getContext()),
+ 1);
+ auto MDConst = ConstantAsMetadata::get(const_1);
+ MDNode *md = MDNode::get(
+ v->getParent()->getParent()->getParent()->getContext(), MDConst);
+ phi->setMetadata("is_base_value", md);
+ states[v] = PhiState(PhiState::Conflict, phi);
+ } else {
+ SelectInst *sel = cast<SelectInst>(v);
+ // The undef will be replaced later
+ UndefValue *undef = UndefValue::get(sel->getType());
+ SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
+ undef, "base_select", sel);
+ NewInsertedDefs.insert(basesel);
+ // Add metadata marking this as a base value
+ auto *const_1 = ConstantInt::get(
+ Type::getInt32Ty(
+ v->getParent()->getParent()->getParent()->getContext()),
+ 1);
+ auto MDConst = ConstantAsMetadata::get(const_1);
+ MDNode *md = MDNode::get(
+ v->getParent()->getParent()->getParent()->getContext(), MDConst);
+ basesel->setMetadata("is_base_value", md);
+ states[v] = PhiState(PhiState::Conflict, basesel);
}
}
@@ -818,97 +836,98 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
assert(!isKnownBaseResult(v) && "why did it get added?");
assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
- if (state.isConflict()) {
- if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
- PHINode *phi = cast<PHINode>(v);
- unsigned NumPHIValues = phi->getNumIncomingValues();
- for (unsigned i = 0; i < NumPHIValues; i++) {
- Value *InVal = phi->getIncomingValue(i);
- BasicBlock *InBB = phi->getIncomingBlock(i);
-
- // If we've already seen InBB, add the same incoming value
- // we added for it earlier. The IR verifier requires phi
- // nodes with multiple entries from the same basic block
- // to have the same incoming value for each of those
- // entries. If we don't do this check here and basephi
- // has a different type than base, we'll end up adding two
- // bitcasts (and hence two distinct values) as incoming
- // values for the same basic block.
-
- int blockIndex = basephi->getBasicBlockIndex(InBB);
- if (blockIndex != -1) {
- Value *oldBase = basephi->getIncomingValue(blockIndex);
- basephi->addIncoming(oldBase, InBB);
+ if (!state.isConflict())
+ continue;
+
+ if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
+ PHINode *phi = cast<PHINode>(v);
+ unsigned NumPHIValues = phi->getNumIncomingValues();
+ for (unsigned i = 0; i < NumPHIValues; i++) {
+ Value *InVal = phi->getIncomingValue(i);
+ BasicBlock *InBB = phi->getIncomingBlock(i);
+
+ // If we've already seen InBB, add the same incoming value
+ // we added for it earlier. The IR verifier requires phi
+ // nodes with multiple entries from the same basic block
+ // to have the same incoming value for each of those
+ // entries. If we don't do this check here and basephi
+ // has a different type than base, we'll end up adding two
+ // bitcasts (and hence two distinct values) as incoming
+ // values for the same basic block.
+
+ int blockIndex = basephi->getBasicBlockIndex(InBB);
+ if (blockIndex != -1) {
+ Value *oldBase = basephi->getIncomingValue(blockIndex);
+ basephi->addIncoming(oldBase, InBB);
#ifndef NDEBUG
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- assert(NewInsertedDefs.count(base) &&
- "should have already added this in a prev. iteration!");
- }
-
- // In essense this assert states: the only way two
- // values incoming from the same basic block may be
- // different is by being different bitcasts of the same
- // value. A cleanup that remains TODO is changing
- // findBaseOrBDV to return an llvm::Value of the correct
- // type (and still remain pure). This will remove the
- // need to add bitcasts.
- assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
- "sanity -- findBaseOrBDV should be pure!");
-#endif
- continue;
- }
-
- // Find either the defining value for the PHI or the normal base for
- // a non-phi node
Value *base = findBaseOrBDV(InVal, cache);
if (!isKnownBaseResult(base)) {
// Either conflict or base.
assert(states.count(base));
base = states[base].getBase();
assert(base != nullptr && "unknown PhiState!");
+ assert(NewInsertedDefs.count(base) &&
+ "should have already added this in a prev. iteration!");
}
- assert(base && "can't be null");
- // Must use original input BB since base may not be Instruction
- // The cast is needed since base traversal may strip away bitcasts
- if (base->getType() != basephi->getType()) {
- base = new BitCastInst(base, basephi->getType(), "cast",
- InBB->getTerminator());
- NewInsertedDefs.insert(base);
- }
- basephi->addIncoming(base, InBB);
+
+ // In essense this assert states: the only way two
+ // values incoming from the same basic block may be
+ // different is by being different bitcasts of the same
+ // value. A cleanup that remains TODO is changing
+ // findBaseOrBDV to return an llvm::Value of the correct
+ // type (and still remain pure). This will remove the
+ // need to add bitcasts.
+ assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+ "sanity -- findBaseOrBDV should be pure!");
+#endif
+ continue;
}
- assert(basephi->getNumIncomingValues() == NumPHIValues);
- } else if (SelectInst *basesel = dyn_cast<SelectInst>(state.getBase())) {
- SelectInst *sel = cast<SelectInst>(v);
- // Operand 1 & 2 are true, false path respectively. TODO: refactor to
- // something more safe and less hacky.
- for (int i = 1; i <= 2; i++) {
- Value *InVal = sel->getOperand(i);
- // Find either the defining value for the PHI or the normal base for
- // a non-phi node
- Value *base = findBaseOrBDV(InVal, cache);
- if (!isKnownBaseResult(base)) {
- // Either conflict or base.
- assert(states.count(base));
- base = states[base].getBase();
- assert(base != nullptr && "unknown PhiState!");
- }
- assert(base && "can't be null");
- // Must use original input BB since base may not be Instruction
- // The cast is needed since base traversal may strip away bitcasts
- if (base->getType() != basesel->getType()) {
- base = new BitCastInst(base, basesel->getType(), "cast", basesel);
- NewInsertedDefs.insert(base);
- }
- basesel->setOperand(i, base);
+
+ // Find either the defining value for the PHI or the normal base for
+ // a non-phi node
+ Value *base = findBaseOrBDV(InVal, cache);
+ if (!isKnownBaseResult(base)) {
+ // Either conflict or base.
+ assert(states.count(base));
+ base = states[base].getBase();
+ assert(base != nullptr && "unknown PhiState!");
}
- } else
- llvm_unreachable("unexpected conflict type");
+ assert(base && "can't be null");
+ // Must use original input BB since base may not be Instruction
+ // The cast is needed since base traversal may strip away bitcasts
+ if (base->getType() != basephi->getType()) {
+ base = new BitCastInst(base, basephi->getType(), "cast",
+ InBB->getTerminator());
+ NewInsertedDefs.insert(base);
+ }
+ basephi->addIncoming(base, InBB);
+ }
+ assert(basephi->getNumIncomingValues() == NumPHIValues);
+ } else {
+ SelectInst *basesel = cast<SelectInst>(state.getBase());
+ SelectInst *sel = cast<SelectInst>(v);
+ // Operand 1 & 2 are true, false path respectively. TODO: refactor to
+ // something more safe and less hacky.
+ for (int i = 1; i <= 2; i++) {
+ Value *InVal = sel->getOperand(i);
+ // Find either the defining value for the PHI or the normal base for
+ // a non-phi node
+ Value *base = findBaseOrBDV(InVal, cache);
+ if (!isKnownBaseResult(base)) {
+ // Either conflict or base.
+ assert(states.count(base));
+ base = states[base].getBase();
+ assert(base != nullptr && "unknown PhiState!");
+ }
+ assert(base && "can't be null");
+ // Must use original input BB since base may not be Instruction
+ // The cast is needed since base traversal may strip away bitcasts
+ if (base->getType() != basesel->getType()) {
+ base = new BitCastInst(base, basesel->getType(), "cast", basesel);
+ NewInsertedDefs.insert(base);
+ }
+ basesel->setOperand(i, base);
+ }
}
}
@@ -964,7 +983,13 @@ static void findBasePointers(const StatepointLiveSetTy &live,
DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
DominatorTree *DT, DefiningValueMapTy &DVCache,
DenseSet<llvm::Value *> &NewInsertedDefs) {
- for (Value *ptr : live) {
+ // For the naming of values inserted to be deterministic - which makes for
+ // much cleaner and more stable tests - we need to assign an order to the
+ // live values. DenseSets do not provide a deterministic order across runs.
+ SmallVector<Value*, 64> Temp;
+ Temp.insert(Temp.end(), live.begin(), live.end());
+ std::sort(Temp.begin(), Temp.end(), order_by_name);
+ for (Value *ptr : Temp) {
Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs);
assert(base && "failed to find base pointer");
PointerToBase[ptr] = base;
@@ -993,10 +1018,19 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
findBasePointers(result.liveset, PointerToBase, &DT, DVCache, NewInsertedDefs);
if (PrintBasePointers) {
+ // Note: Need to print these in a stable order since this is checked in
+ // some tests.
errs() << "Base Pairs (w/o Relocation):\n";
+ SmallVector<Value*, 64> Temp;
+ Temp.reserve(PointerToBase.size());
for (auto Pair : PointerToBase) {
- errs() << " derived %" << Pair.first->getName() << " base %"
- << Pair.second->getName() << "\n";
+ Temp.push_back(Pair.first);
+ }
+ std::sort(Temp.begin(), Temp.end(), order_by_name);
+ for (Value *Ptr : Temp) {
+ Value *Base = PointerToBase[Ptr];
+ errs() << " derived %" << Ptr->getName() << " base %"
+ << Base->getName() << "\n";
}
}
@@ -1131,11 +1165,11 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
/// statepointToken - statepoint instruction to which relocates should be
/// bound.
/// Builder - Llvm IR builder to be used to construct new calls.
-void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables,
- const int liveStart,
- ArrayRef<llvm::Value *> basePtrs,
- Instruction *statepointToken, IRBuilder<> Builder) {
-
+static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables,
+ const int liveStart,
+ ArrayRef<llvm::Value *> basePtrs,
+ Instruction *statepointToken,
+ IRBuilder<> Builder) {
SmallVector<Instruction *, 64> NewDefs;
NewDefs.reserve(liveVariables.size());
@@ -1559,8 +1593,18 @@ static void relocationViaAlloca(
// store must be inserted after load, otherwise store will be in alloca's
// use list and an extra load will be inserted before it
StoreInst *store = new StoreInst(def, alloca);
- if (isa<Instruction>(def)) {
- store->insertAfter(cast<Instruction>(def));
+ if (Instruction *inst = dyn_cast<Instruction>(def)) {
+ if (InvokeInst *invoke = dyn_cast<InvokeInst>(inst)) {
+ // InvokeInst is a TerminatorInst so the store need to be inserted
+ // into its normal destination block.
+ BasicBlock *normalDest = invoke->getNormalDest();
+ store->insertBefore(normalDest->getFirstNonPHI());
+ } else {
+ assert(!inst->isTerminator() &&
+ "The only TerminatorInst that can produce a value is "
+ "InvokeInst which is handled above.");
+ store->insertAfter(inst);
+ }
} else {
assert((isa<Argument>(def) || isa<GlobalVariable>(def) ||
(isa<Constant>(def) && cast<Constant>(def)->isNullValue())) &&
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 05b9608..875a007 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -35,7 +36,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -154,7 +154,7 @@ namespace {
/// Constant Propagation.
///
class SCCPSolver : public InstVisitor<SCCPSolver> {
- const DataLayout *DL;
+ const DataLayout &DL;
const TargetLibraryInfo *TLI;
SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
DenseMap<Value*, LatticeVal> ValueState; // The state each value is in.
@@ -206,8 +206,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
typedef std::pair<BasicBlock*, BasicBlock*> Edge;
DenseSet<Edge> KnownFeasibleEdges;
public:
- SCCPSolver(const DataLayout *DL, const TargetLibraryInfo *tli)
- : DL(DL), TLI(tli) {}
+ SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli)
+ : DL(DL), TLI(tli) {}
/// MarkBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -1561,8 +1561,7 @@ bool SCCP::runOnFunction(Function &F) {
return false;
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
- const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
+ const DataLayout &DL = F.getParent()->getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SCCPSolver Solver(DL, TLI);
@@ -1691,8 +1690,7 @@ static bool AddressIsTaken(const GlobalValue *GV) {
}
bool IPSCCP::runOnModule(Module &M) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
+ const DataLayout &DL = M.getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
SCCPSolver Solver(DL, TLI);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index f69c750..06b000f 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -247,7 +247,7 @@ public:
/// hold.
void insert(ArrayRef<Slice> NewSlices) {
int OldSize = Slices.size();
- std::move(NewSlices.begin(), NewSlices.end(), std::back_inserter(Slices));
+ Slices.append(NewSlices.begin(), NewSlices.end());
auto SliceI = Slices.begin() + OldSize;
std::sort(SliceI, Slices.end());
std::inplace_merge(Slices.begin(), SliceI, Slices.end());
@@ -701,6 +701,7 @@ private:
// by writing out the code here where we have tho underlying allocation
// size readily available.
APInt GEPOffset = Offset;
+ const DataLayout &DL = GEPI.getModule()->getDataLayout();
for (gep_type_iterator GTI = gep_type_begin(GEPI),
GTE = gep_type_end(GEPI);
GTI != GTE; ++GTI) {
@@ -750,6 +751,7 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
+ const DataLayout &DL = LI.getModule()->getDataLayout();
uint64_t Size = DL.getTypeStoreSize(LI.getType());
return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
}
@@ -761,6 +763,7 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
+ const DataLayout &DL = SI.getModule()->getDataLayout();
uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
// If this memory access can be shown to *statically* extend outside the
@@ -898,6 +901,7 @@ private:
SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
Visited.insert(Root);
Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+ const DataLayout &DL = Root->getModule()->getDataLayout();
// If there are no loads or stores, the access is dead. We mark that as
// a size zero access.
Size = 0;
@@ -1194,7 +1198,6 @@ class SROA : public FunctionPass {
const bool RequiresDomTree;
LLVMContext *C;
- const DataLayout *DL;
DominatorTree *DT;
AssumptionCache *AC;
@@ -1243,7 +1246,7 @@ class SROA : public FunctionPass {
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
- DL(nullptr), DT(nullptr) {
+ DT(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -1349,7 +1352,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
///
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
-static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
+static bool isSafePHIToSpeculate(PHINode &PN) {
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1381,6 +1384,8 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
if (!HaveLoad)
return false;
+ const DataLayout &DL = PN.getModule()->getDataLayout();
+
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
@@ -1403,7 +1408,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, const DataLayout *DL = nullptr) {
// is already a load in the block, then we can move the load to the pred
// block.
if (InVal->isDereferenceablePointer(DL) ||
- isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
+ isSafeToLoadUnconditionally(InVal, TI, MaxAlign))
continue;
return false;
@@ -1468,10 +1473,10 @@ static void speculatePHINodeLoads(PHINode &PN) {
///
/// We can do this to a select if its only uses are loads and if the operand
/// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI,
- const DataLayout *DL = nullptr) {
+static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
+ const DataLayout &DL = SI.getModule()->getDataLayout();
bool TDerefable = TValue->isDereferenceablePointer(DL);
bool FDerefable = FValue->isDereferenceablePointer(DL);
@@ -1484,10 +1489,10 @@ static bool isSafeSelectToSpeculate(SelectInst &SI,
// absolutely (e.g. allocas) or at this point because we can see other
// accesses to it.
if (!TDerefable &&
- !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment(), DL))
+ !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment()))
return false;
if (!FDerefable &&
- !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment(), DL))
+ !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment()))
return false;
}
@@ -3699,6 +3704,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
// them to the alloca slices.
SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
std::vector<LoadInst *> SplitLoads;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
for (LoadInst *LI : Loads) {
SplitLoads.clear();
@@ -3724,10 +3730,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
LoadInst *PLoad = IRB.CreateAlignedLoad(
- getAdjustedPtr(IRB, *DL, BasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ getAdjustedPtr(IRB, DL, BasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, BasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false,
+ getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
// Append this load onto the list of split loads so we can find it later
@@ -3777,10 +3783,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
(void)PStore;
DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
@@ -3857,20 +3863,20 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
} else {
IRB.SetInsertPoint(BasicBlock::iterator(LI));
PLoad = IRB.CreateAlignedLoad(
- getAdjustedPtr(IRB, *DL, LoadBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ getAdjustedPtr(IRB, DL, LoadBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, LoadBasePtr->getName() + "."),
- getAdjustedAlignment(LI, PartOffset, *DL), /*IsVolatile*/ false,
+ getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
}
// And store this partition.
IRB.SetInsertPoint(BasicBlock::iterator(SI));
StoreInst *PStore = IRB.CreateAlignedStore(
- PLoad, getAdjustedPtr(IRB, *DL, StoreBasePtr,
- APInt(DL->getPointerSizeInBits(), PartOffset),
+ PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
+ APInt(DL.getPointerSizeInBits(), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
- getAdjustedAlignment(SI, PartOffset, *DL), /*IsVolatile*/ false);
+ getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
// Now build a new slice for the alloca.
NewSlices.push_back(
@@ -3970,25 +3976,26 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
- if (DL->getTypeAllocSize(CommonUseTy) >= P.size())
+ if (DL.getTypeAllocSize(CommonUseTy) >= P.size())
SliceTy = CommonUseTy;
if (!SliceTy)
- if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(),
+ if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
P.beginOffset(), P.size()))
SliceTy = TypePartitionTy;
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
- DL->isLegalInteger(P.size() * 8))
+ DL.isLegalInteger(P.size() * 8))
SliceTy = Type::getIntNTy(*C, P.size() * 8);
if (!SliceTy)
SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
- assert(DL->getTypeAllocSize(SliceTy) >= P.size());
+ assert(DL.getTypeAllocSize(SliceTy) >= P.size());
- bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, *DL);
+ bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
VectorType *VecTy =
- IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, *DL);
+ IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
if (VecTy)
SliceTy = VecTy;
@@ -4010,12 +4017,12 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// The minimum alignment which users can rely on when the explicit
// alignment is omitted or zero is that required by the ABI for this
// type.
- Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
+ Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
}
Alignment = MinAlign(Alignment, P.beginOffset());
// If we will get at least this much alignment from the type alone, leave
// the alloca's alignment unconstrained.
- if (Alignment <= DL->getABITypeAlignment(SliceTy))
+ if (Alignment <= DL.getABITypeAlignment(SliceTy))
Alignment = 0;
NewAI = new AllocaInst(
SliceTy, nullptr, Alignment,
@@ -4035,7 +4042,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
SmallPtrSet<PHINode *, 8> PHIUsers;
SmallPtrSet<SelectInst *, 8> SelectUsers;
- AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, P.beginOffset(),
+ AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
P.endOffset(), IsIntegerPromotable, VecTy,
PHIUsers, SelectUsers);
bool Promotable = true;
@@ -4057,7 +4064,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
E = PHIUsers.end();
I != E; ++I)
- if (!isSafePHIToSpeculate(**I, DL)) {
+ if (!isSafePHIToSpeculate(**I)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
@@ -4066,7 +4073,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
E = SelectUsers.end();
I != E; ++I)
- if (!isSafeSelectToSpeculate(**I, DL)) {
+ if (!isSafeSelectToSpeculate(**I)) {
Promotable = false;
PHIUsers.clear();
SelectUsers.clear();
@@ -4110,6 +4117,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
unsigned NumPartitions = 0;
bool Changed = false;
+ const DataLayout &DL = AI.getModule()->getDataLayout();
// First try to pre-split loads and stores.
Changed |= presplitLoadsAndStores(AI, AS);
@@ -4127,7 +4135,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// confident that the above handling of splittable loads and stores is
// completely sufficient before we forcibly disable the remaining handling.
if (S.beginOffset() == 0 &&
- S.endOffset() >= DL->getTypeAllocSize(AI.getAllocatedType()))
+ S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType()))
continue;
if (isa<LoadInst>(S.getUse()->getUser()) ||
isa<StoreInst>(S.getUse()->getUser())) {
@@ -4155,7 +4163,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
Changed = true;
if (NewAI != &AI) {
uint64_t SizeOfByte = 8;
- uint64_t AllocaSize = DL->getTypeSizeInBits(NewAI->getAllocatedType());
+ uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
// Don't include any padding.
uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size));
@@ -4236,21 +4244,22 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
AI.eraseFromParent();
return true;
}
+ const DataLayout &DL = AI.getModule()->getDataLayout();
// Skip alloca forms that this analysis can't handle.
if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
- DL->getTypeAllocSize(AI.getAllocatedType()) == 0)
+ DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
return false;
bool Changed = false;
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter(*DL);
+ AggLoadStoreRewriter AggRewriter(DL);
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
- AllocaSlices AS(*DL, AI);
+ AllocaSlices AS(DL, AI);
DEBUG(AS.print(dbgs()));
if (AS.isEscaped())
return Changed;
@@ -4423,12 +4432,6 @@ bool SROA::runOnFunction(Function &F) {
DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP) {
- DEBUG(dbgs() << " Skipping SROA -- no target data!\n");
- return false;
- }
- DL = &DLP->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index c7232a9..3e7cf04 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -217,6 +217,9 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
/// \returns The profiled weight of I.
unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
DebugLoc DLoc = Inst.getDebugLoc();
+ if (DLoc.isUnknown())
+ return 0;
+
unsigned Lineno = DLoc.getLine();
if (Lineno < HeaderLineno)
return 0;
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 621633b..6cc8411 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopDeletionPass(Registry);
initializeLoopAccessAnalysisPass(Registry);
initializeLoopInstSimplifyPass(Registry);
+ initializeLoopInterchangePass(Registry);
initializeLoopRotatePass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopRerollPass(Registry);
@@ -209,7 +210,6 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createVerifierPass());
- // FIXME: should this also add createDebugInfoVerifierPass()?
}
void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 5c49a55..acd8585 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -89,7 +89,6 @@ namespace {
private:
bool HasDomTree;
- const DataLayout *DL;
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
@@ -159,9 +158,10 @@ namespace {
void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
Type *MemOpType, bool isStore, AllocaInfo &Info,
Instruction *TheAccess, bool AllowWholeAccess);
- bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
- uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
- Type *&IdxTy);
+ bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
+ const DataLayout &DL);
+ uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
+ const DataLayout &DL);
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
@@ -699,9 +699,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
- AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &DL, 0));
+ AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, DL, 0));
- if (GetUnderlyingObject(MTI->getSource(), &DL, 0) != OrigAI) {
+ if (GetUnderlyingObject(MTI->getSource(), DL, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
@@ -717,7 +717,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
- } else if (GetUnderlyingObject(MTI->getDest(), &DL, 0) != OrigAI) {
+ } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
@@ -1032,17 +1032,8 @@ bool SROA::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
-
bool Changed = performPromotion(F);
- // FIXME: ScalarRepl currently depends on DataLayout more than it
- // theoretically needs to. It should be refactored in order to support
- // target-independent IR. Until this is done, just skip the actual
- // scalar-replacement portion of this pass.
- if (!DL) return Changed;
-
while (1) {
bool LocalChange = performScalarRepl(F);
if (!LocalChange) break; // No need to repromote if no scalarrepl
@@ -1148,7 +1139,8 @@ public:
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
+static bool isSafeSelectToSpeculate(SelectInst *SI) {
+ const DataLayout &DL = SI->getModule()->getDataLayout();
bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL);
bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL);
@@ -1158,11 +1150,13 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
- if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
- LI->getAlignment(), DL))
+ if (!TDerefable &&
+ !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+ LI->getAlignment()))
return false;
- if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
- LI->getAlignment(), DL))
+ if (!FDerefable &&
+ !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+ LI->getAlignment()))
return false;
}
@@ -1185,7 +1179,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
///
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
-static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
+static bool isSafePHIToSpeculate(PHINode *PN) {
// For now, we can only do this promotion if the load is in the same block as
// the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1209,6 +1203,8 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
MaxAlign = std::max(MaxAlign, LI->getAlignment());
}
+ const DataLayout &DL = PN->getModule()->getDataLayout();
+
// Okay, we know that we have one or more loads in the same block as the PHI.
// We can transform this if it is safe to push the loads into the predecessor
// blocks. The only thing to watch out for is that we can't put a possibly
@@ -1234,7 +1230,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
if (InVal->isDereferenceablePointer(DL) ||
- isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL))
+ isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign))
continue;
return false;
@@ -1248,7 +1244,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
/// direct (non-volatile) loads and stores to it. If the alloca is close but
/// not quite there, this will transform the code to allow promotion. As such,
/// it is a non-pure predicate.
-static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout &DL) {
SetVector<Instruction*, SmallVector<Instruction*, 4>,
SmallPtrSet<Instruction*, 4> > InstsToRewrite;
for (User *U : AI->users()) {
@@ -1279,7 +1275,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
// If it is safe to turn "load (select c, AI, ptr)" into a select of two
// loads, then we can transform this by rewriting the select.
- if (!isSafeSelectToSpeculate(SI, DL))
+ if (!isSafeSelectToSpeculate(SI))
return false;
InstsToRewrite.insert(SI);
@@ -1294,7 +1290,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
// If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
// in the pred blocks, then we can transform this by rewriting the PHI.
- if (!isSafePHIToSpeculate(PN, DL))
+ if (!isSafePHIToSpeculate(PN))
return false;
InstsToRewrite.insert(PN);
@@ -1416,6 +1412,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
+ const DataLayout &DL = F.getParent()->getDataLayout();
DominatorTree *DT = nullptr;
if (HasDomTree)
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1479,6 +1476,7 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
//
bool SROA::performScalarRepl(Function &F) {
std::vector<AllocaInst*> WorkList;
+ const DataLayout &DL = F.getParent()->getDataLayout();
// Scan the entry basic block, adding allocas to the worklist.
BasicBlock &BB = F.getEntryBlock();
@@ -1508,7 +1506,7 @@ bool SROA::performScalarRepl(Function &F) {
// transform the allocation instruction if it is an array allocation
// (allocations OF arrays are ok though), and an allocation of a scalar
// value cannot be decomposed at all.
- uint64_t AllocaSize = DL->getTypeAllocSize(AI->getAllocatedType());
+ uint64_t AllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
// Do not promote [0 x %struct].
if (AllocaSize == 0) continue;
@@ -1531,8 +1529,9 @@ bool SROA::performScalarRepl(Function &F) {
// promoted itself. If so, we don't want to transform it needlessly. Note
// that we can't just check based on the type: the alloca may be of an i32
// but that has pointer arithmetic to set byte 3 of it or something.
- if (AllocaInst *NewAI = ConvertToScalarInfo(
- (unsigned)AllocaSize, *DL, ScalarLoadThreshold).TryConvert(AI)) {
+ if (AllocaInst *NewAI =
+ ConvertToScalarInfo((unsigned)AllocaSize, DL, ScalarLoadThreshold)
+ .TryConvert(AI)) {
NewAI->takeName(AI);
AI->eraseFromParent();
++NumConverted;
@@ -1610,6 +1609,7 @@ void SROA::DeleteDeadInstructions() {
/// referenced by this instruction.
void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
AllocaInfo &Info) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
@@ -1632,8 +1632,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
if (!LI->isSimple())
return MarkUnsafe(Info, User);
Type *LIType = LI->getType();
- isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
- LIType, false, Info, LI, true /*AllowWholeAccess*/);
+ isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
+ LI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
@@ -1642,8 +1642,8 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, User);
Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
- SIType, true, Info, SI, true /*AllowWholeAccess*/);
+ isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
+ SI, true /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
@@ -1675,6 +1675,7 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
if (!Info.CheckedPHIs.insert(PN).second)
return;
+ const DataLayout &DL = I->getModule()->getDataLayout();
for (User *U : I->users()) {
Instruction *UI = cast<Instruction>(U);
@@ -1691,8 +1692,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
if (!LI->isSimple())
return MarkUnsafe(Info, UI);
Type *LIType = LI->getType();
- isSafeMemAccess(Offset, DL->getTypeAllocSize(LIType),
- LIType, false, Info, LI, false /*AllowWholeAccess*/);
+ isSafeMemAccess(Offset, DL.getTypeAllocSize(LIType), LIType, false, Info,
+ LI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
@@ -1701,8 +1702,8 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
return MarkUnsafe(Info, UI);
Type *SIType = SI->getOperand(0)->getType();
- isSafeMemAccess(Offset, DL->getTypeAllocSize(SIType),
- SIType, true, Info, SI, false /*AllowWholeAccess*/);
+ isSafeMemAccess(Offset, DL.getTypeAllocSize(SIType), SIType, true, Info,
+ SI, false /*AllowWholeAccess*/);
Info.hasALoadOrStore = true;
} else if (isa<PHINode>(UI) || isa<SelectInst>(UI)) {
isSafePHISelectUseForScalarRepl(UI, Offset, Info);
@@ -1746,9 +1747,11 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
// constant part of the offset.
if (NonConstant)
Indices.pop_back();
- Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
- if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset,
- NonConstantIdxSize))
+
+ const DataLayout &DL = GEPI->getModule()->getDataLayout();
+ Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, NonConstantIdxSize,
+ DL))
MarkUnsafe(Info, GEPI);
}
@@ -1803,9 +1806,10 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
Type *MemOpType, bool isStore,
AllocaInfo &Info, Instruction *TheAccess,
bool AllowWholeAccess) {
+ const DataLayout &DL = TheAccess->getModule()->getDataLayout();
// Check if this is a load/store of the entire alloca.
if (Offset == 0 && AllowWholeAccess &&
- MemSize == DL->getTypeAllocSize(Info.AI->getAllocatedType())) {
+ MemSize == DL.getTypeAllocSize(Info.AI->getAllocatedType())) {
// This can be safe for MemIntrinsics (where MemOpType is 0) and integer
// loads/stores (which are essentially the same as the MemIntrinsics with
// regard to copying padding between elements). But, if an alloca is
@@ -1828,7 +1832,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
}
// Check if the offset/size correspond to a component within the alloca type.
Type *T = Info.AI->getAllocatedType();
- if (TypeHasComponent(T, Offset, MemSize)) {
+ if (TypeHasComponent(T, Offset, MemSize, DL)) {
Info.hasSubelementAccess = true;
return;
}
@@ -1838,24 +1842,25 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
/// TypeHasComponent - Return true if T has a component type with the
/// specified offset and size. If Size is zero, do not check the size.
-bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
+bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size,
+ const DataLayout &DL) {
Type *EltTy;
uint64_t EltSize;
if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = DL->getStructLayout(ST);
+ const StructLayout *Layout = DL.getStructLayout(ST);
unsigned EltIdx = Layout->getElementContainingOffset(Offset);
EltTy = ST->getContainedType(EltIdx);
- EltSize = DL->getTypeAllocSize(EltTy);
+ EltSize = DL.getTypeAllocSize(EltTy);
Offset -= Layout->getElementOffset(EltIdx);
} else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
EltTy = AT->getElementType();
- EltSize = DL->getTypeAllocSize(EltTy);
+ EltSize = DL.getTypeAllocSize(EltTy);
if (Offset >= AT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
} else if (VectorType *VT = dyn_cast<VectorType>(T)) {
EltTy = VT->getElementType();
- EltSize = DL->getTypeAllocSize(EltTy);
+ EltSize = DL.getTypeAllocSize(EltTy);
if (Offset >= VT->getNumElements() * EltSize)
return false;
Offset %= EltSize;
@@ -1867,7 +1872,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
// Check if the component spans multiple elements.
if (Offset + Size > EltSize)
return false;
- return TypeHasComponent(EltTy, Offset, Size);
+ return TypeHasComponent(EltTy, Offset, Size, DL);
}
/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
@@ -1876,6 +1881,7 @@ bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
/// instruction.
void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVectorImpl<AllocaInst *> &NewElts) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
Use &TheUse = *UI++;
Instruction *User = cast<Instruction>(TheUse.getUser());
@@ -1893,8 +1899,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
uint64_t MemSize = Length->getZExtValue();
- if (Offset == 0 &&
- MemSize == DL->getTypeAllocSize(AI->getAllocatedType()))
+ if (Offset == 0 && MemSize == DL.getTypeAllocSize(AI->getAllocatedType()))
RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
// Otherwise the intrinsic can only touch a single element and the
// address operand will be updated, so nothing else needs to be done.
@@ -1930,8 +1935,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
LI->replaceAllUsesWith(Insert);
DeadInsts.push_back(LI);
} else if (LIType->isIntegerTy() &&
- DL->getTypeAllocSize(LIType) ==
- DL->getTypeAllocSize(AI->getAllocatedType())) {
+ DL.getTypeAllocSize(LIType) ==
+ DL.getTypeAllocSize(AI->getAllocatedType())) {
// If this is a load of the entire alloca to an integer, rewrite it.
RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
}
@@ -1957,8 +1962,8 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
DeadInsts.push_back(SI);
} else if (SIType->isIntegerTy() &&
- DL->getTypeAllocSize(SIType) ==
- DL->getTypeAllocSize(AI->getAllocatedType())) {
+ DL.getTypeAllocSize(SIType) ==
+ DL.getTypeAllocSize(AI->getAllocatedType())) {
// If this is a store of the entire alloca from an integer, rewrite it.
RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
}
@@ -2001,7 +2006,8 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
Type *T = AI->getAllocatedType();
uint64_t EltOffset = 0;
Type *IdxTy;
- uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy,
+ BC->getModule()->getDataLayout());
Instruction *Val = NewElts[Idx];
if (Val->getType() != BC->getDestTy()) {
Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
@@ -2016,11 +2022,12 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
/// Sets T to the type of the element and Offset to the offset within that
/// element. IdxTy is set to the type of the index result to be used in a
/// GEP instruction.
-uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
- Type *&IdxTy) {
+uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, Type *&IdxTy,
+ const DataLayout &DL) {
uint64_t Idx = 0;
+
if (StructType *ST = dyn_cast<StructType>(T)) {
- const StructLayout *Layout = DL->getStructLayout(ST);
+ const StructLayout *Layout = DL.getStructLayout(ST);
Idx = Layout->getElementContainingOffset(Offset);
T = ST->getContainedType(Idx);
Offset -= Layout->getElementOffset(Idx);
@@ -2028,7 +2035,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
return Idx;
} else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
T = AT->getElementType();
- uint64_t EltSize = DL->getTypeAllocSize(T);
+ uint64_t EltSize = DL.getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
IdxTy = Type::getInt64Ty(T->getContext());
@@ -2036,7 +2043,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
}
VectorType *VT = cast<VectorType>(T);
T = VT->getElementType();
- uint64_t EltSize = DL->getTypeAllocSize(T);
+ uint64_t EltSize = DL.getTypeAllocSize(T);
Idx = Offset / EltSize;
Offset -= Idx * EltSize;
IdxTy = Type::getInt64Ty(T->getContext());
@@ -2049,6 +2056,7 @@ uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVectorImpl<AllocaInst *> &NewElts) {
uint64_t OldOffset = Offset;
+ const DataLayout &DL = GEPI->getModule()->getDataLayout();
SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
// If the GEP was dynamic then it must have been a dynamic vector lookup.
// In this case, it must be the last GEP operand which is dynamic so keep that
@@ -2057,19 +2065,19 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
Value* NonConstantIdx = nullptr;
if (!GEPI->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
- Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
+ Offset += DL.getIndexedOffset(GEPI->getPointerOperandType(), Indices);
RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
Type *T = AI->getAllocatedType();
Type *IdxTy;
- uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+ uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy, DL);
if (GEPI->getOperand(0) == AI)
OldIdx = ~0ULL; // Force the GEP to be rewritten.
T = AI->getAllocatedType();
uint64_t EltOffset = Offset;
- uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+ uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
// If this GEP does not move the pointer across elements of the alloca
// being split, then it does not needs to be rewritten.
@@ -2080,7 +2088,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<Value*, 8> NewArgs;
NewArgs.push_back(Constant::getNullValue(i32Ty));
while (EltOffset != 0) {
- uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+ uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy, DL);
NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
}
if (NonConstantIdx) {
@@ -2114,9 +2122,10 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
// Put matching lifetime markers on everything from Offset up to
// Offset+OldSize.
Type *AIType = AI->getAllocatedType();
+ const DataLayout &DL = II->getModule()->getDataLayout();
uint64_t NewOffset = Offset;
Type *IdxTy;
- uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
+ uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy, DL);
IRBuilder<> Builder(II);
uint64_t Size = OldSize->getLimitedValue();
@@ -2129,7 +2138,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = DL->getTypeAllocSize(IdxTy) - NewOffset;
+ uint64_t EltSize = DL.getTypeAllocSize(IdxTy) - NewOffset;
if (EltSize > Size) {
EltSize = Size;
Size = 0;
@@ -2145,7 +2154,7 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
for (; Idx != NewElts.size() && Size; ++Idx) {
IdxTy = NewElts[Idx]->getAllocatedType();
- uint64_t EltSize = DL->getTypeAllocSize(IdxTy);
+ uint64_t EltSize = DL.getTypeAllocSize(IdxTy);
if (EltSize > Size) {
EltSize = Size;
Size = 0;
@@ -2221,6 +2230,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
bool SROADest = MI->getRawDest() == Inst;
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
+ const DataLayout &DL = MI->getModule()->getDataLayout();
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
@@ -2237,10 +2247,10 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
Type *OtherTy = OtherPtrTy->getElementType();
if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
- EltOffset = DL->getStructLayout(ST)->getElementOffset(i);
+ EltOffset = DL.getStructLayout(ST)->getElementOffset(i);
} else {
Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
- EltOffset = DL->getTypeAllocSize(EltTy)*i;
+ EltOffset = DL.getTypeAllocSize(EltTy) * i;
}
// The alignment of the other pointer is the guaranteed alignment of the
@@ -2281,7 +2291,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
Type *ValTy = EltTy->getScalarType();
// Construct an integer with the right value.
- unsigned EltSize = DL->getTypeSizeInBits(ValTy);
+ unsigned EltSize = DL.getTypeSizeInBits(ValTy);
APInt OneVal(EltSize, CI->getZExtValue());
APInt TotalVal(OneVal);
// Set each byte.
@@ -2311,7 +2321,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// this element.
}
- unsigned EltSize = DL->getTypeAllocSize(EltTy);
+ unsigned EltSize = DL.getTypeAllocSize(EltTy);
if (!EltSize)
continue;
@@ -2345,12 +2355,13 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// and store the element value to the individual alloca.
Value *SrcVal = SI->getOperand(0);
Type *AllocaEltTy = AI->getAllocatedType();
- uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
+ const DataLayout &DL = SI->getModule()->getDataLayout();
+ uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
IRBuilder<> Builder(SI);
// Handle tail padding by extending the operand
- if (DL->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+ if (DL.getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
SrcVal = Builder.CreateZExt(SrcVal,
IntegerType::get(SI->getContext(), AllocaSizeBits));
@@ -2360,15 +2371,15 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- const StructLayout *Layout = DL->getStructLayout(EltSTy);
+ const StructLayout *Layout = DL.getStructLayout(EltSTy);
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// Get the number of bits to shift SrcVal to get the value.
Type *FieldTy = EltSTy->getElementType(i);
uint64_t Shift = Layout->getElementOffsetInBits(i);
- if (DL->isBigEndian())
- Shift = AllocaSizeBits-Shift-DL->getTypeAllocSizeInBits(FieldTy);
+ if (DL.isBigEndian())
+ Shift = AllocaSizeBits - Shift - DL.getTypeAllocSizeInBits(FieldTy);
Value *EltVal = SrcVal;
if (Shift) {
@@ -2377,7 +2388,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
// Truncate down to an integer of the right size.
- uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
+ uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
@@ -2402,12 +2413,12 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
} else {
ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
Type *ArrayEltTy = ATy->getElementType();
- uint64_t ElementOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
- uint64_t ElementSizeBits = DL->getTypeSizeInBits(ArrayEltTy);
+ uint64_t ElementOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
+ uint64_t ElementSizeBits = DL.getTypeSizeInBits(ArrayEltTy);
uint64_t Shift;
- if (DL->isBigEndian())
+ if (DL.isBigEndian())
Shift = AllocaSizeBits-ElementOffset;
else
Shift = 0;
@@ -2441,7 +2452,7 @@ SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
}
new StoreInst(EltVal, DestField, SI);
- if (DL->isBigEndian())
+ if (DL.isBigEndian())
Shift -= ElementOffset;
else
Shift += ElementOffset;
@@ -2459,7 +2470,8 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// Extract each element out of the NewElts according to its structure offset
// and form the result value.
Type *AllocaEltTy = AI->getAllocatedType();
- uint64_t AllocaSizeBits = DL->getTypeAllocSizeInBits(AllocaEltTy);
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ uint64_t AllocaSizeBits = DL.getTypeAllocSizeInBits(AllocaEltTy);
DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
<< '\n');
@@ -2469,10 +2481,10 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
const StructLayout *Layout = nullptr;
uint64_t ArrayEltBitOffset = 0;
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
- Layout = DL->getStructLayout(EltSTy);
+ Layout = DL.getStructLayout(EltSTy);
} else {
Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
- ArrayEltBitOffset = DL->getTypeAllocSizeInBits(ArrayEltTy);
+ ArrayEltBitOffset = DL.getTypeAllocSizeInBits(ArrayEltTy);
}
Value *ResultVal =
@@ -2484,7 +2496,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
Value *SrcField = NewElts[i];
Type *FieldTy =
cast<PointerType>(SrcField->getType())->getElementType();
- uint64_t FieldSizeBits = DL->getTypeSizeInBits(FieldTy);
+ uint64_t FieldSizeBits = DL.getTypeSizeInBits(FieldTy);
// Ignore zero sized fields like {}, they obviously contain no data.
if (FieldSizeBits == 0) continue;
@@ -2515,7 +2527,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
else // Array case.
Shift = i*ArrayEltBitOffset;
- if (DL->isBigEndian())
+ if (DL.isBigEndian())
Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
if (Shift) {
@@ -2532,7 +2544,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
}
// Handle tail padding by truncating the result
- if (DL->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+ if (DL.getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
LI->replaceAllUsesWith(ResultVal);
@@ -2589,13 +2601,15 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
return false;
}
+ const DataLayout &DL = AI->getModule()->getDataLayout();
+
// Okay, we know all the users are promotable. If the aggregate is a memcpy
// source and destination, we have to be careful. In particular, the memcpy
// could be moving around elements that live in structure padding of the LLVM
// types, but may actually be used. In these cases, we refuse to promote the
// struct.
if (Info.isMemCpySrc && Info.isMemCpyDst &&
- HasPadding(AI->getAllocatedType(), *DL))
+ HasPadding(AI->getAllocatedType(), DL))
return false;
// If the alloca never has an access to just *part* of it, but is accessed
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 6036c09..a457cba 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -165,7 +165,7 @@ private:
void gather(Instruction *, const ValueVector &);
bool canTransferMetadata(unsigned Kind);
void transferMetadata(Instruction *, const ValueVector &);
- bool getVectorLayout(Type *, unsigned, VectorLayout &);
+ bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &);
bool finish();
template<typename T> bool splitBinary(Instruction &, const T &);
@@ -173,7 +173,6 @@ private:
ScatterMap Scattered;
GatherList Gathered;
unsigned ParallelLoopAccessMDKind;
- const DataLayout *DL;
bool ScalarizeLoadStore;
};
@@ -248,8 +247,6 @@ bool Scalarizer::doInitialization(Module &M) {
}
bool Scalarizer::runOnFunction(Function &F) {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
BasicBlock *BB = BBI;
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
@@ -345,10 +342,7 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
// Try to fill in Layout from Ty, returning true on success. Alignment is
// the alignment of the vector, or 0 if the ABI default should be used.
bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
- VectorLayout &Layout) {
- if (!DL)
- return false;
-
+ VectorLayout &Layout, const DataLayout &DL) {
// Make sure we're dealing with a vector.
Layout.VecTy = dyn_cast<VectorType>(Ty);
if (!Layout.VecTy)
@@ -356,15 +350,15 @@ bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
// Check that we're dealing with full-byte elements.
Layout.ElemTy = Layout.VecTy->getElementType();
- if (DL->getTypeSizeInBits(Layout.ElemTy) !=
- DL->getTypeStoreSizeInBits(Layout.ElemTy))
+ if (DL.getTypeSizeInBits(Layout.ElemTy) !=
+ DL.getTypeStoreSizeInBits(Layout.ElemTy))
return false;
if (Alignment)
Layout.VecAlign = Alignment;
else
- Layout.VecAlign = DL->getABITypeAlignment(Layout.VecTy);
- Layout.ElemSize = DL->getTypeStoreSize(Layout.ElemTy);
+ Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
+ Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
return true;
}
@@ -456,7 +450,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
Indices.resize(NumIndices);
for (unsigned J = 0; J < NumIndices; ++J)
Indices[J] = Ops[J][I];
- Res[I] = Builder.CreateGEP(Base[I], Indices,
+ Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
GEPI.getName() + ".i" + Twine(I));
if (GEPI.isInBounds())
if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
@@ -595,7 +589,8 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
return false;
VectorLayout Layout;
- if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout))
+ if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
+ LI.getModule()->getDataLayout()))
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
@@ -619,7 +614,8 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
VectorLayout Layout;
Value *FullValue = SI.getValueOperand();
- if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout))
+ if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
+ SI.getModule()->getDataLayout()))
return false;
unsigned NumElems = Layout.VecTy->getNumElements();
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index bffe8df..1a04d74 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -199,18 +199,15 @@ class ConstantOffsetExtractor {
/// new index representing the remainder (equal to the original index minus
/// the constant offset), or nullptr if we cannot extract a constant offset.
/// \p Idx The given GEP index
- /// \p DL The datalayout of the module
/// \p GEP The given GEP
- static Value *Extract(Value *Idx, const DataLayout *DL,
- GetElementPtrInst *GEP);
+ static Value *Extract(Value *Idx, GetElementPtrInst *GEP);
/// Looks for a constant offset from the given GEP index without extracting
/// it. It returns the numeric value of the extracted constant offset (0 if
/// failed). The meaning of the arguments are the same as Extract.
- static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP);
+ static int64_t Find(Value *Idx, GetElementPtrInst *GEP);
private:
- ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
- : DL(Layout), IP(InsertionPt) {}
+ ConstantOffsetExtractor(Instruction *InsertionPt) : IP(InsertionPt) {}
/// Searches the expression that computes V for a non-zero constant C s.t.
/// V can be reassociated into the form V' + C. If the searching is
/// successful, returns C and update UserChain as a def-use chain from C to V;
@@ -294,8 +291,6 @@ class ConstantOffsetExtractor {
/// A data structure used in rebuildWithoutConstOffset. Contains all
/// sext/zext instructions along UserChain.
SmallVector<CastInst *, 16> ExtInsts;
- /// The data layout of the module. Used in ComputeKnownBits.
- const DataLayout *DL;
Instruction *IP; /// Insertion position of cloned instructions.
};
@@ -312,19 +307,10 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
}
- bool doInitialization(Module &M) override {
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (DLP == nullptr)
- report_fatal_error("data layout missing");
- DL = &DLP->getDataLayout();
- return false;
- }
-
bool runOnFunction(Function &F) override;
private:
@@ -372,7 +358,6 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
/// Verified in @i32_add in split-gep.ll
bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
- const DataLayout *DL;
const TargetMachine *TM;
/// Whether to lower a GEP with multiple indices into arithmetic operations or
/// multiple GEPs with a single index.
@@ -386,7 +371,6 @@ INITIALIZE_PASS_BEGIN(
"Split GEPs to a variadic base and a constant offset for better CSE", false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
INITIALIZE_PASS_END(
SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
"Split GEPs to a variadic base and a constant offset for better CSE", false,
@@ -647,9 +631,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
return BO;
}
-Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL,
- GetElementPtrInst *GEP) {
- ConstantOffsetExtractor Extractor(DL, GEP);
+Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP) {
+ ConstantOffsetExtractor Extractor(GEP);
// Find a non-zero constant offset first.
APInt ConstantOffset =
Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
@@ -660,10 +643,9 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, const DataLayout *DL,
return Extractor.rebuildWithoutConstOffset();
}
-int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL,
- GetElementPtrInst *GEP) {
+int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
// If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
- return ConstantOffsetExtractor(DL, GEP)
+ return ConstantOffsetExtractor(GEP)
.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
GEP->isInBounds())
.getSExtValue();
@@ -674,6 +656,7 @@ void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
IntegerType *IT = cast<IntegerType>(V->getType());
KnownOne = APInt(IT->getBitWidth(), 0);
KnownZero = APInt(IT->getBitWidth(), 0);
+ const DataLayout &DL = IP->getModule()->getDataLayout();
llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0);
}
@@ -689,7 +672,8 @@ bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
GetElementPtrInst *GEP) {
bool Changed = false;
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ const DataLayout &DL = GEP->getModule()->getDataLayout();
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
gep_type_iterator GTI = gep_type_begin(*GEP);
for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
I != E; ++I, ++GTI) {
@@ -710,18 +694,19 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
NeedsExtraction = false;
int64_t AccumulativeByteOffset = 0;
gep_type_iterator GTI = gep_type_begin(*GEP);
+ const DataLayout &DL = GEP->getModule()->getDataLayout();
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
if (isa<SequentialType>(*GTI)) {
// Tries to extract a constant offset from this GEP index.
int64_t ConstantOffset =
- ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP);
+ ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);
if (ConstantOffset != 0) {
NeedsExtraction = true;
// A GEP may have multiple indices. We accumulate the extracted
// constant offset to a byte offset, and later offset the remainder of
// the original GEP with this byte offset.
AccumulativeByteOffset +=
- ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+ ConstantOffset * DL.getTypeAllocSize(GTI.getIndexedType());
}
} else if (LowerGEP) {
StructType *StTy = cast<StructType>(*GTI);
@@ -730,7 +715,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
if (Field != 0) {
NeedsExtraction = true;
AccumulativeByteOffset +=
- DL->getStructLayout(StTy)->getElementOffset(Field);
+ DL.getStructLayout(StTy)->getElementOffset(Field);
}
}
}
@@ -740,7 +725,8 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
IRBuilder<> Builder(Variadic);
- Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
+ const DataLayout &DL = Variadic->getModule()->getDataLayout();
+ Type *IntPtrTy = DL.getIntPtrType(Variadic->getType());
Type *I8PtrTy =
Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
@@ -760,7 +746,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
continue;
APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
- DL->getTypeAllocSize(GTI.getIndexedType()));
+ DL.getTypeAllocSize(GTI.getIndexedType()));
// Scale the index by element size.
if (ElementSize != 1) {
if (ElementSize.isPowerOf2()) {
@@ -791,7 +777,8 @@ void
SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
int64_t AccumulativeByteOffset) {
IRBuilder<> Builder(Variadic);
- Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
+ const DataLayout &DL = Variadic->getModule()->getDataLayout();
+ Type *IntPtrTy = DL.getIntPtrType(Variadic->getType());
Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
gep_type_iterator GTI = gep_type_begin(*Variadic);
@@ -807,7 +794,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
continue;
APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
- DL->getTypeAllocSize(GTI.getIndexedType()));
+ DL.getTypeAllocSize(GTI.getIndexedType()));
// Scale the index by element size.
if (ElementSize != 1) {
if (ElementSize.isPowerOf2()) {
@@ -880,8 +867,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
if (isa<SequentialType>(*GTI)) {
// Splits this GEP index into a variadic part and a constant offset, and
// uses the variadic part as the new index.
- Value *NewIdx =
- ConstantOffsetExtractor::Extract(GEP->getOperand(I), DL, GEP);
+ Value *NewIdx = ConstantOffsetExtractor::Extract(GEP->getOperand(I), GEP);
if (NewIdx != nullptr) {
GEP->setOperand(I, NewIdx);
}
@@ -958,15 +944,17 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
// unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
// used with unsigned integers later.
+ const DataLayout &DL = GEP->getModule()->getDataLayout();
int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
- DL->getTypeAllocSize(GEP->getType()->getElementType()));
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ DL.getTypeAllocSize(GEP->getType()->getElementType()));
+ Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
// Very likely. As long as %gep is natually aligned, the byte offset we
// extracted should be a multiple of sizeof(*%gep).
int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
- NewGEP = GetElementPtrInst::Create(
- NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
+ NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
+ ConstantInt::get(IntPtrTy, Index, true),
+ GEP->getName(), GEP);
} else {
// Unlikely but possible. For example,
// #pragma pack(1)
@@ -986,8 +974,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
GEP->getPointerAddressSpace());
NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
NewGEP = GetElementPtrInst::Create(
- NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
- "uglygep", GEP);
+ Type::getInt8Ty(GEP->getContext()), NewGEP,
+ ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
+ GEP);
if (GEP->getType() != I8PtrTy)
NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index fb8fe38..8566cd9 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -127,7 +127,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
- const DataLayout *DL, AssumptionCache *AC,
+ AssumptionCache *AC,
unsigned BonusInstThreshold) {
bool Changed = false;
bool LocalChange = true;
@@ -137,7 +137,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, DL, AC)) {
+ if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
LocalChange = true;
++NumSimpl;
}
@@ -148,11 +148,10 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
}
static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
- const DataLayout *DL, AssumptionCache *AC,
- int BonusInstThreshold) {
+ AssumptionCache *AC, int BonusInstThreshold) {
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -166,7 +165,7 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, DL, AC, BonusInstThreshold);
+ EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
@@ -181,11 +180,10 @@ SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold)
PreservedAnalyses SimplifyCFGPass::run(Function &F,
AnalysisManager<Function> *AM) {
- auto *DL = F.getParent()->getDataLayout();
auto &TTI = AM->getResult<TargetIRAnalysis>(F);
auto &AC = AM->getResult<AssumptionAnalysis>(F);
- if (!simplifyFunctionCFG(F, TTI, DL, &AC, BonusInstThreshold))
+ if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -207,9 +205,7 @@ struct CFGSimplifyPass : public FunctionPass {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- return simplifyFunctionCFG(F, TTI, DL, AC, BonusInstThreshold);
+ return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index d0ee0a6..b169d56 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -35,7 +36,6 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
AliasAnalysis *AA;
- const DataLayout *DL;
public:
static char ID; // Pass identification
@@ -100,8 +100,6 @@ bool Sinking::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
AA = &getAnalysis<AliasAnalysis>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
bool MadeChange, EverMadeChange = false;
@@ -196,7 +194,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!isSafeToSpeculativelyExecute(Inst, DL))
+ if (!isSafeToSpeculativelyExecute(Inst))
return false;
// We don't want to sink across a critical edge if we don't dominate the
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 4edc86c..e71031c 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -15,19 +15,30 @@
//
// There are many optimizations we can perform in the domain of SLSR. This file
// for now contains only an initial step. Specifically, we look for strength
-// reduction candidate in the form of
+// reduction candidates in two forms:
//
-// (B + i) * S
+// Form 1: (B + i) * S
+// Form 2: &B[i * S]
//
-// where B and S are integer constants or variables, and i is a constant
-// integer. If we found two such candidates
+// where S is an integer variable, and i is a constant integer. If we found two
+// candidates
//
-// S1: X = (B + i) * S S2: Y = (B + i') * S
+// S1: X = (B + i) * S
+// S2: Y = (B + i') * S
+//
+// or
+//
+// S1: X = &B[i * S]
+// S2: Y = &B[i' * S]
//
// and S1 dominates S2, we call S1 a basis of S2, and can replace S2 with
//
// Y = X + (i' - i) * S
//
+// or
+//
+// Y = &X[(i' - i) * S]
+//
// where (i' - i) * S is folded to the extent possible. When S2 has multiple
// bases, we pick the one that is closest to S2, or S2's "immediate" basis.
//
@@ -35,8 +46,6 @@
//
// - Handle candidates in the form of B + i * S
//
-// - Handle candidates in the form of pointer arithmetics. e.g., B[i * S]
-//
// - Floating point arithmetics when fast math is enabled.
//
// - SLSR may decrease ILP at the architecture level. Targets that are very
@@ -45,6 +54,10 @@
#include <vector>
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
@@ -58,14 +71,30 @@ using namespace PatternMatch;
namespace {
class StraightLineStrengthReduce : public FunctionPass {
- public:
+public:
// SLSR candidate. Such a candidate must be in the form of
// (Base + Index) * Stride
+ // or
+ // Base[..][Index * Stride][..]
struct Candidate : public ilist_node<Candidate> {
- Candidate(Value *B = nullptr, ConstantInt *Idx = nullptr,
- Value *S = nullptr, Instruction *I = nullptr)
- : Base(B), Index(Idx), Stride(S), Ins(I), Basis(nullptr) {}
- Value *Base;
+ enum Kind {
+ Invalid, // reserved for the default constructor
+ Mul, // (B + i) * S
+ GEP, // &B[..][i * S][..]
+ };
+
+ Candidate()
+ : CandidateKind(Invalid), Base(nullptr), Index(nullptr),
+ Stride(nullptr), Ins(nullptr), Basis(nullptr) {}
+ Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+ Instruction *I)
+ : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I),
+ Basis(nullptr) {}
+ Kind CandidateKind;
+ const SCEV *Base;
+ // Note that Index and Stride of a GEP candidate may not have the same
+ // integer type. In that case, during rewriting, Stride will be
+ // sign-extended or truncated to Index's type.
ConstantInt *Index;
Value *Stride;
// The instruction this candidate corresponds to. It helps us to rewrite a
@@ -90,33 +119,70 @@ class StraightLineStrengthReduce : public FunctionPass {
static char ID;
- StraightLineStrengthReduce() : FunctionPass(ID), DT(nullptr) {
+ StraightLineStrengthReduce()
+ : FunctionPass(ID), DL(nullptr), DT(nullptr), TTI(nullptr) {
initializeStraightLineStrengthReducePass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
// We do not modify the shape of the CFG.
AU.setPreservesCFG();
}
+ bool doInitialization(Module &M) override {
+ DL = &M.getDataLayout();
+ return false;
+ }
+
bool runOnFunction(Function &F) override;
- private:
+private:
// Returns true if Basis is a basis for C, i.e., Basis dominates C and they
// share the same base and stride.
bool isBasisFor(const Candidate &Basis, const Candidate &C);
// Checks whether I is in a candidate form. If so, adds all the matching forms
// to Candidates, and tries to find the immediate basis for each of them.
void allocateCandidateAndFindBasis(Instruction *I);
- // Given that I is in the form of "(B + Idx) * S", adds this form to
- // Candidates, and finds its immediate basis.
- void allocateCandidateAndFindBasis(Value *B, ConstantInt *Idx, Value *S,
+ // Allocate candidates and find bases for Mul instructions.
+ void allocateCandidateAndFindBasisForMul(Instruction *I);
+ // Splits LHS into Base + Index and, if succeeds, calls
+ // allocateCandidateAndFindBasis.
+ void allocateCandidateAndFindBasisForMul(Value *LHS, Value *RHS,
+ Instruction *I);
+ // Allocate candidates and find bases for GetElementPtr instructions.
+ void allocateCandidateAndFindBasisForGEP(GetElementPtrInst *GEP);
+ // A helper function that scales Idx with ElementSize before invoking
+ // allocateCandidateAndFindBasis.
+ void allocateCandidateAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx,
+ Value *S, uint64_t ElementSize,
+ Instruction *I);
+ // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate
+ // basis.
+ void allocateCandidateAndFindBasis(Candidate::Kind CT, const SCEV *B,
+ ConstantInt *Idx, Value *S,
Instruction *I);
// Rewrites candidate C with respect to Basis.
void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis);
+ // A helper function that factors ArrayIdx to a product of a stride and a
+ // constant index, and invokes allocateCandidateAndFindBasis with the
+ // factorings.
+ void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
+ GetElementPtrInst *GEP);
+ // Emit code that computes the "bump" from Basis to C. If the candidate is a
+ // GEP and the bump is not divisible by the element size of the GEP, this
+ // function sets the BumpWithUglyGEP flag to notify its caller to bump the
+ // basis using an ugly GEP.
+ static Value *emitBump(const Candidate &Basis, const Candidate &C,
+ IRBuilder<> &Builder, const DataLayout *DL,
+ bool &BumpWithUglyGEP);
+ const DataLayout *DL;
DominatorTree *DT;
+ ScalarEvolution *SE;
+ TargetTransformInfo *TTI;
ilist<Candidate> Candidates;
// Temporarily holds all instructions that are unlinked (but not deleted) by
// rewriteCandidateWithBasis. These instructions will be actually removed
@@ -129,6 +195,8 @@ char StraightLineStrengthReduce::ID = 0;
INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
"Straight line strength reduction", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
"Straight line strength reduction", false, false)
@@ -141,9 +209,47 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
return (Basis.Ins != C.Ins && // skip the same instruction
// Basis must dominate C in order to rewrite C with respect to Basis.
DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) &&
- // They share the same base and stride.
+ // They share the same base, stride, and candidate kind.
Basis.Base == C.Base &&
- Basis.Stride == C.Stride);
+ Basis.Stride == C.Stride &&
+ Basis.CandidateKind == C.CandidateKind);
+}
+
+static bool isCompletelyFoldable(GetElementPtrInst *GEP,
+ const TargetTransformInfo *TTI,
+ const DataLayout *DL) {
+ GlobalVariable *BaseGV = nullptr;
+ int64_t BaseOffset = 0;
+ bool HasBaseReg = false;
+ int64_t Scale = 0;
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()))
+ BaseGV = GV;
+ else
+ HasBaseReg = true;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ int64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+ if (ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I)) {
+ BaseOffset += ConstIdx->getSExtValue() * ElementSize;
+ } else {
+ // Needs scale register.
+ if (Scale != 0) {
+ // No addressing mode takes two scale registers.
+ return false;
+ }
+ Scale = ElementSize;
+ }
+ } else {
+ StructType *STy = cast<StructType>(*GTI);
+ uint64_t Field = cast<ConstantInt>(*I)->getZExtValue();
+ BaseOffset += DL->getStructLayout(STy)->getElementOffset(Field);
+ }
+ }
+ return TTI->isLegalAddressingMode(GEP->getType()->getElementType(), BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
// TODO: We currently implement an algorithm whose time complexity is linear to
@@ -153,11 +259,17 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
// table is indexed by the base and the stride of a candidate. Therefore,
// finding the immediate basis of a candidate boils down to one hash-table look
// up.
-void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Value *B,
- ConstantInt *Idx,
- Value *S,
- Instruction *I) {
- Candidate C(B, Idx, S, I);
+void StraightLineStrengthReduce::allocateCandidateAndFindBasis(
+ Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S,
+ Instruction *I) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If &B[Idx * S] fits into an addressing mode, do not turn it into
+ // non-free computation.
+ if (isCompletelyFoldable(GEP, TTI, DL))
+ return;
+ }
+
+ Candidate C(CT, B, Idx, S, I);
// Try to compute the immediate basis of C.
unsigned NumIterations = 0;
// Limit the scan radius to avoid running forever.
@@ -176,60 +288,209 @@ void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Value *B,
}
void StraightLineStrengthReduce::allocateCandidateAndFindBasis(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Mul:
+ allocateCandidateAndFindBasisForMul(I);
+ break;
+ case Instruction::GetElementPtr:
+ allocateCandidateAndFindBasisForGEP(cast<GetElementPtrInst>(I));
+ break;
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
+ Value *LHS, Value *RHS, Instruction *I) {
Value *B = nullptr;
ConstantInt *Idx = nullptr;
- // "(Base + Index) * Stride" must be a Mul instruction at the first hand.
- if (I->getOpcode() == Instruction::Mul) {
- if (IntegerType *ITy = dyn_cast<IntegerType>(I->getType())) {
- Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
- for (unsigned Swapped = 0; Swapped < 2; ++Swapped) {
- // Only handle the canonical operand ordering.
- if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) {
- // If LHS is in the form of "Base + Index", then I is in the form of
- // "(Base + Index) * RHS".
- allocateCandidateAndFindBasis(B, Idx, RHS, I);
- } else {
- // Otherwise, at least try the form (LHS + 0) * RHS.
- allocateCandidateAndFindBasis(LHS, ConstantInt::get(ITy, 0), RHS, I);
- }
- // Swap LHS and RHS so that we also cover the cases where LHS is the
- // stride.
- if (LHS == RHS)
- break;
- std::swap(LHS, RHS);
- }
- }
+ // Only handle the canonical operand ordering.
+ if (match(LHS, m_Add(m_Value(B), m_ConstantInt(Idx)))) {
+ // If LHS is in the form of "Base + Index", then I is in the form of
+ // "(Base + Index) * RHS".
+ allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I);
+ } else {
+ // Otherwise, at least try the form (LHS + 0) * RHS.
+ ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0);
+ allocateCandidateAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS,
+ I);
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidateAndFindBasisForMul(
+ Instruction *I) {
+ // Try matching (B + i) * S.
+ // TODO: we could extend SLSR to float and vector types.
+ if (!isa<IntegerType>(I->getType()))
+ return;
+
+ Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+ allocateCandidateAndFindBasisForMul(LHS, RHS, I);
+ if (LHS != RHS) {
+ // Symmetrically, try to split RHS to Base + Index.
+ allocateCandidateAndFindBasisForMul(RHS, LHS, I);
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
+ const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize,
+ Instruction *I) {
+ // I = B + sext(Idx *nsw S) *nsw ElementSize
+ // = B + (sext(Idx) * ElementSize) * sext(S)
+ // Casting to IntegerType is safe because we skipped vector GEPs.
+ IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
+ ConstantInt *ScaledIdx = ConstantInt::get(
+ IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
+ allocateCandidateAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
+}
+
+void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx,
+ const SCEV *Base,
+ uint64_t ElementSize,
+ GetElementPtrInst *GEP) {
+ // At least, ArrayIdx = ArrayIdx *s 1.
+ allocateCandidateAndFindBasisForGEP(
+ Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1),
+ ArrayIdx, ElementSize, GEP);
+ Value *LHS = nullptr;
+ ConstantInt *RHS = nullptr;
+ // TODO: handle shl. e.g., we could treat (S << 2) as (S * 4).
+ //
+ // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx
+ // itself. This would allow us to handle the shl case for free. However,
+ // matching SCEVs has two issues:
+ //
+ // 1. this would complicate rewriting because the rewriting procedure
+ // would have to translate SCEVs back to IR instructions. This translation
+ // is difficult when LHS is further evaluated to a composite SCEV.
+ //
+ // 2. ScalarEvolution is designed to be control-flow oblivious. It tends
+ // to strip nsw/nuw flags which are critical for SLSR to trace into
+ // sext'ed multiplication.
+ if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) {
+ // SLSR is currently unsafe if i * S may overflow.
+ // GEP = Base + sext(LHS *nsw RHS) *nsw ElementSize
+ allocateCandidateAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP);
+ }
+}
+
+void StraightLineStrengthReduce::allocateCandidateAndFindBasisForGEP(
+ GetElementPtrInst *GEP) {
+ // TODO: handle vector GEPs
+ if (GEP->getType()->isVectorTy())
+ return;
+
+ const SCEV *GEPExpr = SE->getSCEV(GEP);
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I) {
+ if (!isa<SequentialType>(*GTI++))
+ continue;
+ Value *ArrayIdx = *I;
+ // Compute the byte offset of this index.
+ uint64_t ElementSize = DL->getTypeAllocSize(*GTI);
+ const SCEV *ElementSizeExpr = SE->getSizeOfExpr(IntPtrTy, *GTI);
+ const SCEV *ArrayIdxExpr = SE->getSCEV(ArrayIdx);
+ ArrayIdxExpr = SE->getTruncateOrSignExtend(ArrayIdxExpr, IntPtrTy);
+ const SCEV *LocalOffset =
+ SE->getMulExpr(ArrayIdxExpr, ElementSizeExpr, SCEV::FlagNSW);
+ // The base of this candidate equals GEPExpr less the byte offset of this
+ // index.
+ const SCEV *Base = SE->getMinusSCEV(GEPExpr, LocalOffset);
+ factorArrayIndex(ArrayIdx, Base, ElementSize, GEP);
+ // When ArrayIdx is the sext of a value, we try to factor that value as
+ // well. Handling this case is important because array indices are
+ // typically sign-extended to the pointer size.
+ Value *TruncatedArrayIdx = nullptr;
+ if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))))
+ factorArrayIndex(TruncatedArrayIdx, Base, ElementSize, GEP);
}
}
+// A helper function that unifies the bitwidth of A and B.
+static void unifyBitWidth(APInt &A, APInt &B) {
+ if (A.getBitWidth() < B.getBitWidth())
+ A = A.sext(B.getBitWidth());
+ else if (A.getBitWidth() > B.getBitWidth())
+ B = B.sext(A.getBitWidth());
+}
+
+Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
+ const Candidate &C,
+ IRBuilder<> &Builder,
+ const DataLayout *DL,
+ bool &BumpWithUglyGEP) {
+ APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
+ unifyBitWidth(Idx, BasisIdx);
+ APInt IndexOffset = Idx - BasisIdx;
+
+ BumpWithUglyGEP = false;
+ if (Basis.CandidateKind == Candidate::GEP) {
+ APInt ElementSize(
+ IndexOffset.getBitWidth(),
+ DL->getTypeAllocSize(
+ cast<GetElementPtrInst>(Basis.Ins)->getType()->getElementType()));
+ APInt Q, R;
+ APInt::sdivrem(IndexOffset, ElementSize, Q, R);
+ if (R.getSExtValue() == 0)
+ IndexOffset = Q;
+ else
+ BumpWithUglyGEP = true;
+ }
+ // Compute Bump = C - Basis = (i' - i) * S.
+ // Common case 1: if (i' - i) is 1, Bump = S.
+ if (IndexOffset.getSExtValue() == 1)
+ return C.Stride;
+ // Common case 2: if (i' - i) is -1, Bump = -S.
+ if (IndexOffset.getSExtValue() == -1)
+ return Builder.CreateNeg(C.Stride);
+ // Otherwise, Bump = (i' - i) * sext/trunc(S).
+ ConstantInt *Delta = ConstantInt::get(Basis.Ins->getContext(), IndexOffset);
+ Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, Delta->getType());
+ return Builder.CreateMul(ExtendedStride, Delta);
+}
+
void StraightLineStrengthReduce::rewriteCandidateWithBasis(
const Candidate &C, const Candidate &Basis) {
+ assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
+ C.Stride == Basis.Stride);
+
// An instruction can correspond to multiple candidates. Therefore, instead of
// simply deleting an instruction when we rewrite it, we mark its parent as
// nullptr (i.e. unlink it) so that we can skip the candidates whose
// instruction is already rewritten.
if (!C.Ins->getParent())
return;
- assert(C.Base == Basis.Base && C.Stride == Basis.Stride);
- // Basis = (B + i) * S
- // C = (B + i') * S
- // ==>
- // C = Basis + (i' - i) * S
+
IRBuilder<> Builder(C.Ins);
- ConstantInt *IndexOffset = ConstantInt::get(
- C.Ins->getContext(), C.Index->getValue() - Basis.Index->getValue());
- Value *Reduced;
- // TODO: preserve nsw/nuw in some cases.
- if (IndexOffset->isOne()) {
- // If (i' - i) is 1, fold C into Basis + S.
- Reduced = Builder.CreateAdd(Basis.Ins, C.Stride);
- } else if (IndexOffset->isMinusOne()) {
- // If (i' - i) is -1, fold C into Basis - S.
- Reduced = Builder.CreateSub(Basis.Ins, C.Stride);
- } else {
- Value *Bump = Builder.CreateMul(C.Stride, IndexOffset);
+ bool BumpWithUglyGEP;
+ Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
+ Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
+ switch (C.CandidateKind) {
+ case Candidate::Mul:
Reduced = Builder.CreateAdd(Basis.Ins, Bump);
- }
+ break;
+ case Candidate::GEP:
+ {
+ Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
+ if (BumpWithUglyGEP) {
+ // C = (char *)Basis + Bump
+ unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
+ Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
+ Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
+ // We only considered inbounds GEP as candidates.
+ Reduced = Builder.CreateInBoundsGEP(Reduced, Bump);
+ Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
+ } else {
+ // C = gep Basis, Bump
+ // Canonicalize bump to pointer size.
+ Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
+ Reduced = Builder.CreateInBoundsGEP(Basis.Ins, Bump);
+ }
+ }
+ break;
+ default:
+ llvm_unreachable("C.CandidateKind is invalid");
+ };
Reduced->takeName(C.Ins);
C.Ins->replaceAllUsesWith(Reduced);
C.Ins->dropAllReferences();
@@ -243,15 +504,15 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ SE = &getAnalysis<ScalarEvolution>();
// Traverse the dominator tree in the depth-first order. This order makes sure
// all bases of a candidate are in Candidates when we process it.
for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
- BasicBlock *B = node->getBlock();
- for (auto I = B->begin(); I != B->end(); ++I) {
- allocateCandidateAndFindBasis(I);
- }
+ for (auto &I : *node->getBlock())
+ allocateCandidateAndFindBasis(&I);
}
// Rewrite candidates in the reverse depth-first order. This order makes sure
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index aaf6f9a..6c3ce58 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -9,8 +9,8 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
@@ -18,6 +18,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 715ddeb..9eef132 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -54,8 +54,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -87,7 +87,6 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
const TargetTransformInfo *TTI;
- const DataLayout *DL;
static char ID; // Pass identification, replacement for typeid
TailCallElim() : FunctionPass(ID) {
@@ -159,8 +158,6 @@ bool TailCallElim::runOnFunction(Function &F) {
if (skipOptnoneFunction(F))
return false;
- DL = F.getParent()->getDataLayout();
-
bool AllCallsAreTailCalls = false;
bool Modified = markTails(F, AllCallsAreTailCalls);
if (AllCallsAreTailCalls)
@@ -392,10 +389,9 @@ bool TailCallElim::runTRE(Function &F) {
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
- // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls
- // marked with the 'tail' attribute, because doing so would cause the stack
- // size to increase (real TRE would deallocate variable sized allocas, TRE
- // doesn't).
+ // If false, we cannot perform TRE on tail calls marked with the 'tail'
+ // attribute, because doing so would cause the stack size to increase (real
+ // TRE would deallocate variable sized allocas, TRE doesn't).
bool CanTRETailMarkedCall = CanTRE(F);
// Change any tail recursive calls to loops.
@@ -404,28 +400,19 @@ bool TailCallElim::runTRE(Function &F) {
// alloca' is changed from being a static alloca to being a dynamic alloca.
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
- SmallVector<BasicBlock*, 8> BBToErase;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
+ BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB.
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret) {
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
TailCallsAreMarkedTail, ArgumentPHIs,
!CanTRETailMarkedCall);
- // FoldReturnAndProcessPred may have emptied some BB. Remember to
- // erase them.
- if (Change && BB->empty())
- BBToErase.push_back(BB);
-
- }
MadeChange |= Change;
}
}
- for (auto BB: BBToErase)
- BB->eraseFromParent();
-
// If we eliminated any tail recursions, it's possible that we inserted some
// silly PHI nodes which just merge an initial value (the incoming operand)
// with themselves. Check to see if we did and clean up our mess if so. This
@@ -435,7 +422,7 @@ bool TailCallElim::runTRE(Function &F) {
PHINode *PN = ArgumentPHIs[i];
// If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN)) {
+ if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
PN->eraseFromParent();
}
@@ -445,7 +432,7 @@ bool TailCallElim::runTRE(Function &F) {
}
-/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// Return true if it is safe to move the specified
/// instruction from after the call to before the call, assuming that all
/// instructions between the call and this instruction are movable.
///
@@ -464,7 +451,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// being loaded from.
if (CI->mayWriteToMemory() ||
!isSafeToLoadUnconditionally(L->getPointerOperand(), L,
- L->getAlignment(), DL))
+ L->getAlignment()))
return false;
}
}
@@ -480,13 +467,11 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
return true;
}
-// isDynamicConstant - Return true if the specified value is the same when the
-// return would exit as it was when the initial iteration of the recursive
-// function was executed.
-//
-// We currently handle static constants and arguments that are not modified as
-// part of the recursion.
-//
+/// Return true if the specified value is the same when the return would exit
+/// as it was when the initial iteration of the recursive function was executed.
+///
+/// We currently handle static constants and arguments that are not modified as
+/// part of the recursion.
static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
if (isa<Constant>(V)) return true; // Static constants are always dyn consts
@@ -518,10 +503,9 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
return false;
}
-// getCommonReturnValue - Check to see if the function containing the specified
-// tail call consistently returns the same runtime-constant value at all exit
-// points except for IgnoreRI. If so, return the returned value.
-//
+/// Check to see if the function containing the specified tail call consistently
+/// returns the same runtime-constant value at all exit points except for
+/// IgnoreRI. If so, return the returned value.
static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
Value *ReturnedValue = nullptr;
@@ -545,10 +529,9 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
return ReturnedValue;
}
-/// CanTransformAccumulatorRecursion - If the specified instruction can be
-/// transformed using accumulator recursion elimination, return the constant
-/// which is the start of the accumulator value. Otherwise return null.
-///
+/// If the specified instruction can be transformed using accumulator recursion
+/// elimination, return the constant which is the start of the accumulator
+/// value. Otherwise return null.
Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
CallInst *CI) {
if (!I->isAssociative() || !I->isCommutative()) return nullptr;
@@ -836,14 +819,11 @@ bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
ReturnInst *RI = FoldReturnIntoUncondBranch(Ret, BB, Pred);
// Cleanup: if all predecessors of BB have been eliminated by
- // FoldReturnIntoUncondBranch, we would like to delete it, but we
- // can not just nuke it as it is being used as an iterator by our caller.
- // Just empty it, and the caller will erase it when it is safe to do so.
- // It is important to empty it, because the ret instruction in there is
- // still using a value which EliminateRecursiveTailCall will attempt
- // to remove.
+ // FoldReturnIntoUncondBranch, delete it. It is important to empty it,
+ // because the ret instruction in there is still using a value which
+ // EliminateRecursiveTailCall will attempt to remove.
if (!BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
- BB->getInstList().clear();
+ BB->eraseFromParent();
EliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail,
ArgumentPHIs,
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 762a83f..671cbfe 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -33,7 +33,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
/// specified pointer. This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strlen))
return nullptr;
@@ -45,12 +45,9 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrLen = M->getOrInsertFunction("strlen",
- AttributeSet::get(M->getContext(),
- AS),
- TD->getIntPtrType(Context),
- B.getInt8PtrTy(),
- nullptr);
+ Constant *StrLen = M->getOrInsertFunction(
+ "strlen", AttributeSet::get(M->getContext(), AS),
+ DL.getIntPtrType(Context), B.getInt8PtrTy(), nullptr);
CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -62,7 +59,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
/// specified pointer. Ptr is required to be some pointer type, MaxLen must
/// be of size_t type, and the return value has 'intptr_t' type.
Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strnlen))
return nullptr;
@@ -73,13 +70,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Constant *StrNLen = M->getOrInsertFunction("strnlen",
- AttributeSet::get(M->getContext(),
- AS),
- TD->getIntPtrType(Context),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- nullptr);
+ Constant *StrNLen =
+ M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS),
+ DL.getIntPtrType(Context), B.getInt8PtrTy(),
+ DL.getIntPtrType(Context), nullptr);
CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -91,7 +85,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
/// specified pointer and character. Ptr is required to be some pointer type,
/// and the return value has 'i8*' type.
Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strchr))
return nullptr;
@@ -114,9 +108,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
}
/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
-Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
- IRBuilder<> &B, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strncmp))
return nullptr;
@@ -128,13 +121,9 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *StrNCmp = M->getOrInsertFunction("strncmp",
- AttributeSet::get(M->getContext(),
- AS),
- B.getInt32Ty(),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context), nullptr);
+ Value *StrNCmp = M->getOrInsertFunction(
+ "strncmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr);
CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
CastToCStr(Ptr2, B), Len, "strncmp");
@@ -147,8 +136,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI,
- StringRef Name) {
+ const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strcpy))
return nullptr;
@@ -170,8 +158,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
/// specified pointer arguments.
-Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
- IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strncpy))
return nullptr;
@@ -198,7 +185,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
/// are pointers.
Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
- IRBuilder<> &B, const DataLayout *TD,
+ IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcpy_chk))
return nullptr;
@@ -208,13 +195,10 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
- AttributeSet::get(M->getContext(), AS),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context), nullptr);
+ Value *MemCpy = M->getOrInsertFunction(
+ "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+ DL.getIntPtrType(Context), nullptr);
Dst = CastToCStr(Dst, B);
Src = CastToCStr(Src, B);
CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
@@ -225,9 +209,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
- Value *Len, IRBuilder<> &B, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memchr))
return nullptr;
@@ -236,13 +219,9 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemChr = M->getOrInsertFunction("memchr",
- AttributeSet::get(M->getContext(), AS),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- B.getInt32Ty(),
- TD->getIntPtrType(Context),
- nullptr);
+ Value *MemChr = M->getOrInsertFunction(
+ "memchr", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
+ B.getInt8PtrTy(), B.getInt32Ty(), DL.getIntPtrType(Context), nullptr);
CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
@@ -252,9 +231,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
}
/// EmitMemCmp - Emit a call to the memcmp function.
-Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
- Value *Len, IRBuilder<> &B, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcmp))
return nullptr;
@@ -266,12 +244,9 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
AS[2] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
LLVMContext &Context = B.GetInsertBlock()->getContext();
- Value *MemCmp = M->getOrInsertFunction("memcmp",
- AttributeSet::get(M->getContext(), AS),
- B.getInt32Ty(),
- B.getInt8PtrTy(),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context), nullptr);
+ Value *MemCmp = M->getOrInsertFunction(
+ "memcmp", AttributeSet::get(M->getContext(), AS), B.getInt32Ty(),
+ B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context), nullptr);
CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
Len, "memcmp");
@@ -339,7 +314,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::putchar))
return nullptr;
@@ -361,7 +336,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
-Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::puts))
return nullptr;
@@ -386,7 +361,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
/// an integer and File is a pointer to FILE.
Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputc))
return nullptr;
@@ -419,7 +394,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
/// pointer and File is a pointer to FILE.
Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
- const DataLayout *TD, const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputs))
return nullptr;
@@ -450,9 +425,8 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
- IRBuilder<> &B, const DataLayout *TD,
- const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+ const DataLayout &DL, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fwrite))
return nullptr;
@@ -466,21 +440,18 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
StringRef FWriteName = TLI->getName(LibFunc::fwrite);
Constant *F;
if (File->getType()->isPointerTy())
- F = M->getOrInsertFunction(FWriteName,
- AttributeSet::get(M->getContext(), AS),
- TD->getIntPtrType(Context),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context),
- File->getType(), nullptr);
+ F = M->getOrInsertFunction(
+ FWriteName, AttributeSet::get(M->getContext(), AS),
+ DL.getIntPtrType(Context), B.getInt8PtrTy(), DL.getIntPtrType(Context),
+ DL.getIntPtrType(Context), File->getType(), nullptr);
else
- F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
- B.getInt8PtrTy(),
- TD->getIntPtrType(Context),
- TD->getIntPtrType(Context),
- File->getType(), nullptr);
- CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
- ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+ F = M->getOrInsertFunction(FWriteName, DL.getIntPtrType(Context),
+ B.getInt8PtrTy(), DL.getIntPtrType(Context),
+ DL.getIntPtrType(Context), File->getType(),
+ nullptr);
+ CallInst *CI =
+ B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(DL.getIntPtrType(Context), 1), File);
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
CI->setCallingConv(Fn->getCallingConv());
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 09279b6..f04ea9c 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -34,7 +34,7 @@
#include <map>
using namespace llvm;
-// CloneBasicBlock - See comments in Cloning.h
+/// See comments in Cloning.h.
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
@@ -202,7 +202,7 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
}
}
-/// CloneFunction - Return a copy of the specified function, but without
+/// Return a copy of the specified function, but without
/// embedding the function into another module. Also, any references specified
/// in the VMap are changed to refer to their mapped value instead of the
/// original one. If any of the arguments to the function are in the VMap,
@@ -250,8 +250,7 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
namespace {
- /// PruningFunctionCloner - This class is a private class used to implement
- /// the CloneAndPruneFunctionInto method.
+ /// This is a private class used to implement CloneAndPruneFunctionInto.
struct PruningFunctionCloner {
Function *NewFunc;
const Function *OldFunc;
@@ -259,23 +258,18 @@ namespace {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
- const DataLayout *DL;
CloningDirector *Director;
ValueMapTypeRemapper *TypeMapper;
ValueMaterializer *Materializer;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
- ValueToValueMapTy &valueMap,
- bool moduleLevelChanges,
- const char *nameSuffix,
- ClonedCodeInfo *codeInfo,
- const DataLayout *DL,
+ ValueToValueMapTy &valueMap, bool moduleLevelChanges,
+ const char *nameSuffix, ClonedCodeInfo *codeInfo,
CloningDirector *Director)
- : NewFunc(newFunc), OldFunc(oldFunc),
- VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
- NameSuffix(nameSuffix), CodeInfo(codeInfo), DL(DL),
- Director(Director) {
+ : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
+ ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
+ CodeInfo(codeInfo), Director(Director) {
// These are optional components. The Director may return null.
if (Director) {
TypeMapper = Director->getTypeRemapper();
@@ -286,7 +280,7 @@ namespace {
}
}
- /// CloneBlock - The specified block is found to be reachable, clone it and
+ /// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void CloneBlock(const BasicBlock *BB,
BasicBlock::const_iterator StartingInst,
@@ -294,7 +288,7 @@ namespace {
};
}
-/// CloneBlock - The specified block is found to be reachable, clone it and
+/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
BasicBlock::const_iterator StartingInst,
@@ -360,7 +354,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
// the basic block.
- if (Value *V = SimplifyInstruction(NewInst, DL)) {
+ if (Value *V =
+ SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
// On the off-chance that this simplifies to an instruction in the old
// function, map it back into the new function.
if (Value *MappedV = VMap.lookup(V))
@@ -397,6 +392,14 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// terminator into the new basic block in this case.
if (Action == CloningDirector::StopCloningBB)
return;
+ if (Action == CloningDirector::CloneSuccessors) {
+ // If the director says to skip with a terminate instruction, we still
+ // need to clone this block's successors.
+ const TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+ ToClone.push_back(TI->getSuccessor(i));
+ return;
+ }
assert(Action != CloningDirector::SkipInstruction &&
"SkipInstruction is not valid for terminators.");
}
@@ -455,10 +458,9 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
}
}
-/// CloneAndPruneIntoFromInst - This works like CloneAndPruneFunctionInto, except
-/// that it does not clone the entire function. Instead it starts at an
-/// instruction provided by the caller and copies (and prunes) only the code
-/// reachable from that instruction.
+/// This works like CloneAndPruneFunctionInto, except that it does not clone the
+/// entire function. Instead it starts at an instruction provided by the caller
+/// and copies (and prunes) only the code reachable from that instruction.
void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
ValueToValueMapTy &VMap,
@@ -466,7 +468,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
SmallVectorImpl<ReturnInst *> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
- const DataLayout *DL,
CloningDirector *Director) {
assert(NameSuffix && "NameSuffix cannot be null!");
@@ -488,7 +489,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo, DL, Director);
+ NameSuffix, CodeInfo, Director);
const BasicBlock *StartingBB;
if (StartingInst)
StartingBB = StartingInst->getParent();
@@ -523,11 +524,18 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
- for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
- if (const PHINode *PN = dyn_cast<PHINode>(I))
- PHIToResolve.push_back(PN);
- else
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
+ // PHI nodes may have been remapped to non-PHI nodes by the caller or
+ // during the cloning process.
+ if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (isa<PHINode>(VMap[PN]))
+ PHIToResolve.push_back(PN);
+ else
+ break;
+ } else {
break;
+ }
+ }
// Finally, remap the terminator instructions, as those can't be remapped
// until all BBs are mapped.
@@ -626,10 +634,10 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// node).
for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
- recursivelySimplifyInstruction(PN, DL);
+ recursivelySimplifyInstruction(PN);
// Now that the inlined function body has been fully constructed, go through
- // and zap unconditional fall-through branches. This happen all the time when
+ // and zap unconditional fall-through branches. This happens all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]);
@@ -680,7 +688,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Do not increment I, iteratively merge all things this block branches to.
}
- // Make a final pass over the basic blocks from theh old function to gather
+ // Make a final pass over the basic blocks from the old function to gather
// any return instructions which survived folding. We have to do this here
// because we can iteratively remove and merge returns above.
for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]),
@@ -691,7 +699,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
}
-/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
/// effect of this is to copy significantly less code in cases where (for
/// example) a function call with constant arguments is inlined, and those
@@ -704,9 +712,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
- const DataLayout *DL,
Instruction *TheCall) {
- CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(),
- VMap, ModuleLevelChanges, Returns, NameSuffix,
- CodeInfo, DL, nullptr);
+ CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap,
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
+ nullptr);
}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index e70a7d6..ab89b41 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -332,11 +332,11 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
DEBUG(dbgs() << **i << ", ");
DEBUG(dbgs() << ")\n");
+ StructType *StructTy;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
- PointerType *StructPtr =
- PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+ StructTy = StructType::get(M->getContext(), paramTy);
paramTy.clear();
- paramTy.push_back(StructPtr);
+ paramTy.push_back(PointerType::getUnqual(StructTy));
}
FunctionType *funcType =
FunctionType::get(RetTy, paramTy, false);
@@ -364,8 +364,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
TerminatorInst *TI = newFunction->begin()->getTerminator();
- GetElementPtrInst *GEP =
- GetElementPtrInst::Create(AI, Idx, "gep_" + inputs[i]->getName(), TI);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI);
RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
} else
RewriteVal = AI++;
@@ -447,6 +447,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
}
}
+ StructType *StructArgTy = nullptr;
AllocaInst *Struct = nullptr;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
std::vector<Type*> ArgTypes;
@@ -455,7 +456,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
ArgTypes.push_back((*v)->getType());
// Allocate a struct at the beginning of this function
- Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+ StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
Struct =
new AllocaInst(StructArgTy, nullptr, "structArg",
codeReplacer->getParent()->begin()->begin());
@@ -465,9 +466,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
- GetElementPtrInst *GEP =
- GetElementPtrInst::Create(Struct, Idx,
- "gep_" + StructValues[i]->getName());
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
codeReplacer->getInstList().push_back(GEP);
StoreInst *SI = new StoreInst(StructValues[i], GEP);
codeReplacer->getInstList().push_back(SI);
@@ -491,9 +491,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
- GetElementPtrInst *GEP
- = GetElementPtrInst::Create(Struct, Idx,
- "gep_reload_" + outputs[i]->getName());
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
codeReplacer->getInstList().push_back(GEP);
Output = GEP;
} else {
@@ -606,10 +605,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
FirstOut+out);
- GetElementPtrInst *GEP =
- GetElementPtrInst::Create(OAI, Idx,
- "gep_" + outputs[out]->getName(),
- NTRet);
+ GetElementPtrInst *GEP = GetElementPtrInst::Create(
+ StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(),
+ NTRet);
new StoreInst(outputs[out], GEP, NTRet);
} else {
new StoreInst(outputs[out], OAI, NTRet);
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index 26875e8..dc95089 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/BitVector.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "ctor_utils"
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index c2ef1ac..df3e1d4 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -89,7 +89,7 @@ namespace {
CallerLPad = cast<LandingPadInst>(I);
}
- /// getOuterResumeDest - The outer unwind destination is the target of
+ /// The outer unwind destination is the target of
/// unwind edges introduced for calls within the inlined function.
BasicBlock *getOuterResumeDest() const {
return OuterResumeDest;
@@ -99,17 +99,16 @@ namespace {
LandingPadInst *getLandingPadInst() const { return CallerLPad; }
- /// forwardResume - Forward the 'resume' instruction to the caller's landing
- /// pad block. When the landing pad block has only one predecessor, this is
+ /// Forward the 'resume' instruction to the caller's landing pad block.
+ /// When the landing pad block has only one predecessor, this is
/// a simple branch. When there is more than one predecessor, we need to
/// split the landing pad block after the landingpad instruction and jump
/// to there.
void forwardResume(ResumeInst *RI,
SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
- /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind
- /// destination block for the given basic block, using the values for the
- /// original invoke's source block.
+ /// Add incoming-PHI values to the unwind destination block for the given
+ /// basic block, using the values for the original invoke's source block.
void addIncomingPHIValuesFor(BasicBlock *BB) const {
addIncomingPHIValuesForInto(BB, OuterResumeDest);
}
@@ -124,7 +123,7 @@ namespace {
};
}
-/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
+/// Get or create a target for the branch from ResumeInsts.
BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
if (InnerResumeDest) return InnerResumeDest;
@@ -159,8 +158,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
return InnerResumeDest;
}
-/// forwardResume - Forward the 'resume' instruction to the caller's landing pad
-/// block. When the landing pad block has only one predecessor, this is a simple
+/// Forward the 'resume' instruction to the caller's landing pad block.
+/// When the landing pad block has only one predecessor, this is a simple
/// branch. When there is more than one predecessor, we need to split the
/// landing pad block after the landingpad instruction and jump to there.
void InvokeInliningInfo::forwardResume(ResumeInst *RI,
@@ -178,9 +177,9 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
RI->eraseFromParent();
}
-/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
-/// an invoke, we have to turn all of the calls that can throw into
-/// invokes. This function analyze BB to see if there are any calls, and if so,
+/// When we inline a basic block into an invoke,
+/// we have to turn all of the calls that can throw into invokes.
+/// This function analyze BB to see if there are any calls, and if so,
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
/// nodes in that block with the values specified in InvokeDestPHIValues.
static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
@@ -228,7 +227,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
}
}
-/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// If we inlined an invoke site, we need to convert calls
/// in the body of the inlined function into invokes.
///
/// II is the invoke instruction being inlined. FirstNewBlock is the first
@@ -279,8 +278,8 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
InvokeDest->removePredecessor(II->getParent());
}
-/// CloneAliasScopeMetadata - When inlining a function that contains noalias
-/// scope metadata, this metadata needs to be cloned so that the inlined blocks
+/// When inlining a function that contains noalias scope metadata,
+/// this metadata needs to be cloned so that the inlined blocks
/// have different "unqiue scopes" at every call site. Were this not done, then
/// aliasing scopes from a function inlined into a caller multiple times could
/// not be differentiated (and this would lead to miscompiles because the
@@ -391,12 +390,12 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
}
}
-/// AddAliasScopeMetadata - If the inlined function has noalias arguments, then
-/// add new alias scopes for each noalias argument, tag the mapped noalias
+/// If the inlined function has noalias arguments,
+/// then add new alias scopes for each noalias argument, tag the mapped noalias
/// parameters with noalias metadata specifying the new scope, and tag all
/// non-derived loads, stores and memory intrinsics with the new alias scopes.
static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
- const DataLayout *DL, AliasAnalysis *AA) {
+ const DataLayout &DL, AliasAnalysis *AA) {
if (!EnableNoAliasConversion)
return;
@@ -622,8 +621,9 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
/// If the inlined function has non-byval align arguments, then
/// add @llvm.assume-based alignment assumptions to preserve this information.
static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
- if (!PreserveAlignmentAssumptions || !IFI.DL)
+ if (!PreserveAlignmentAssumptions)
return;
+ auto &DL = CS.getCaller()->getParent()->getDataLayout();
// To avoid inserting redundant assumptions, we should check for assumptions
// already in the caller. To do this, we might need a DT of the caller.
@@ -645,20 +645,20 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
// If we can already prove the asserted alignment in the context of the
// caller, then don't bother inserting the assumption.
Value *Arg = CS.getArgument(I->getArgNo());
- if (getKnownAlignment(Arg, IFI.DL,
+ if (getKnownAlignment(Arg, DL, CS.getInstruction(),
&IFI.ACT->getAssumptionCache(*CalledFunc),
- CS.getInstruction(), &DT) >= Align)
+ &DT) >= Align)
continue;
- IRBuilder<>(CS.getInstruction()).CreateAlignmentAssumption(*IFI.DL, Arg,
- Align);
+ IRBuilder<>(CS.getInstruction())
+ .CreateAlignmentAssumption(DL, Arg, Align);
}
}
}
-/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
-/// into the caller, update the specified callgraph to reflect the changes we
-/// made. Note that it's possible that not all code was copied over, so only
+/// Once we have cloned code over from a callee into the caller,
+/// update the specified callgraph to reflect the changes we made.
+/// Note that it's possible that not all code was copied over, so only
/// some edges of the callgraph may remain.
static void UpdateCallGraphAfterInlining(CallSite CS,
Function::iterator FirstNewBlock,
@@ -693,8 +693,15 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
- if (!NewCall) continue;
+ if (!NewCall)
+ continue;
+ // We do not treat intrinsic calls like real function calls because we
+ // expect them to become inline code; do not add an edge for an intrinsic.
+ CallSite CS = CallSite(NewCall);
+ if (CS && CS.getCalledFunction() && CS.getCalledFunction()->isIntrinsic())
+ continue;
+
// Remember that this call site got inlined for the client of
// InlineFunction.
IFI.InlinedCalls.push_back(NewCall);
@@ -726,11 +733,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
IRBuilder<> Builder(InsertBlock->begin());
- Value *Size;
- if (IFI.DL == nullptr)
- Size = ConstantExpr::getSizeOf(AggTy);
- else
- Size = Builder.getInt64(IFI.DL->getTypeStoreSize(AggTy));
+ Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
@@ -738,7 +741,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1);
}
-/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// When inlining a call site that has a byval argument,
/// we have to make the implicit memcpy explicit by adding it.
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
const Function *CalledFunc,
@@ -759,11 +762,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
return Arg;
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
+
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
- if (getOrEnforceKnownAlignment(Arg, ByValAlignment, IFI.DL,
- &IFI.ACT->getAssumptionCache(*Caller),
- TheCall) >= ByValAlignment)
+ if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall,
+ &IFI.ACT->getAssumptionCache(*Caller)) >=
+ ByValAlignment)
return Arg;
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
@@ -771,10 +776,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
}
// Create the alloca. If we have DataLayout, use nice alignment.
- unsigned Align = 1;
- if (IFI.DL)
- Align = IFI.DL->getPrefTypeAlignment(AggTy);
-
+ unsigned Align =
+ Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy);
+
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
// pointer inside the callee).
@@ -789,8 +793,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
return NewAlloca;
}
-// isUsedByLifetimeMarker - Check whether this Value is used by a lifetime
-// intrinsic.
+// Check whether this Value is used by a lifetime intrinsic.
static bool isUsedByLifetimeMarker(Value *V) {
for (User *U : V->users()) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
@@ -805,7 +808,7 @@ static bool isUsedByLifetimeMarker(Value *V) {
return false;
}
-// hasLifetimeMarkers - Check whether the given alloca already has
+// Check whether the given alloca already has
// lifetime.start or lifetime.end intrinsics.
static bool hasLifetimeMarkers(AllocaInst *AI) {
Type *Ty = AI->getType();
@@ -862,7 +865,7 @@ updateInlinedAtInfo(DebugLoc DL, MDLocation *InlinedAtNode,
return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), Last);
}
-/// fixupLineNumbers - Update inlined instructions' line numbers to
+/// Update inlined instructions' line numbers to
/// to encode location where these instructions are inlined.
static void fixupLineNumbers(Function *Fn, Function::iterator FI,
Instruction *TheCall) {
@@ -920,10 +923,9 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
-/// InlineFunction - This function inlines the called function into the basic
-/// block of the caller. This returns false if it is not possible to inline
-/// this call. The program is still in a well defined state if this occurs
-/// though.
+/// This function inlines the called function into the basic block of the
+/// caller. This returns false if it is not possible to inline this call.
+/// The program is still in a well defined state if this occurs though.
///
/// Note that this only does one level of inlining. For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
@@ -1008,6 +1010,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Keep a list of pair (dst, src) to emit byval initializations.
SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
+ auto &DL = Caller->getParent()->getDataLayout();
+
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -1042,9 +1046,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// have no dead or constant instructions leftover after inlining occurs
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
- CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, IFI.DL, TheCall);
+ &InlinedFunctionInfo, TheCall);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
@@ -1065,7 +1069,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CloneAliasScopeMetadata(CS, VMap);
// Add noalias metadata if necessary.
- AddAliasScopeMetadata(CS, VMap, IFI.DL, IFI.AA);
+ AddAliasScopeMetadata(CS, VMap, DL, IFI.AA);
// FIXME: We could register any cloned assumptions instead of clearing the
// whole function's cache.
@@ -1173,18 +1177,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
ConstantInt *AllocaSize = nullptr;
if (ConstantInt *AIArraySize =
dyn_cast<ConstantInt>(AI->getArraySize())) {
- if (IFI.DL) {
- Type *AllocaType = AI->getAllocatedType();
- uint64_t AllocaTypeSize = IFI.DL->getTypeAllocSize(AllocaType);
- uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
- assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
- // Check that array size doesn't saturate uint64_t and doesn't
- // overflow when it's multiplied by type size.
- if (AllocaArraySize != ~0ULL &&
- UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
- AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
- AllocaArraySize * AllocaTypeSize);
- }
+ auto &DL = Caller->getParent()->getDataLayout();
+ Type *AllocaType = AI->getAllocatedType();
+ uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType);
+ uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
+ assert(AllocaArraySize > 0 && "array size of AllocaInst is zero");
+ // Check that array size doesn't saturate uint64_t and doesn't
+ // overflow when it's multiplied by type size.
+ if (AllocaArraySize != ~0ULL &&
+ UINT64_MAX / AllocaArraySize >= AllocaTypeSize) {
+ AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),
+ AllocaArraySize * AllocaTypeSize);
}
}
@@ -1445,7 +1448,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// the entries are the same or undef). If so, remove the PHI so it doesn't
// block other optimizations.
if (PHI) {
- if (Value *V = SimplifyInstruction(PHI, IFI.DL, nullptr, nullptr,
+ auto &DL = Caller->getParent()->getDataLayout();
+ if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr,
&IFI.ACT->getAssumptionCache(*Caller))) {
PHI->replaceAllUsesWith(V);
PHI->eraseFromParent();
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 4830568..bd15f9e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -17,8 +17,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
@@ -417,7 +417,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
///
/// This returns true if it changed the code, note that it can delete
/// instructions in other blocks as well in this block.
-bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
const TargetLibraryInfo *TLI) {
bool MadeChange = false;
@@ -434,7 +434,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
Instruction *Inst = BI++;
WeakVH BIHandle(BI);
- if (recursivelySimplifyInstruction(Inst, TD, TLI)) {
+ if (recursivelySimplifyInstruction(Inst, TLI)) {
MadeChange = true;
if (BIHandle != BI)
BI = BB->begin();
@@ -464,8 +464,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD,
///
/// .. and delete the predecessor corresponding to the '1', this will attempt to
/// recursively fold the and to 0.
-void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
- DataLayout *TD) {
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {
// This only adjusts blocks with PHI nodes.
if (!isa<PHINode>(BB->begin()))
return;
@@ -480,7 +479,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
Value *OldPhiIt = PhiIt;
- if (!recursivelySimplifyInstruction(PN, TD))
+ if (!recursivelySimplifyInstruction(PN))
continue;
// If recursive simplification ended up deleting the next PHI node we would
@@ -900,13 +899,14 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
/// their preferred alignment from the beginning.
///
static unsigned enforceKnownAlignment(Value *V, unsigned Align,
- unsigned PrefAlign, const DataLayout *TD) {
+ unsigned PrefAlign,
+ const DataLayout &DL) {
V = V->stripPointerCasts();
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
// If the preferred alignment is greater than the natural stack alignment
// then don't round up. This avoids dynamic stack realignment.
- if (TD && TD->exceedsNaturalStackAlignment(PrefAlign))
+ if (DL.exceedsNaturalStackAlignment(PrefAlign))
return Align;
// If there is a requested alignment and if this is an alloca, round up.
if (AI->getAlignment() >= PrefAlign)
@@ -945,13 +945,13 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
/// and it is more than the alignment of the ultimate object, see if we can
/// increase the alignment of the ultimate object, making this check succeed.
unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
- const DataLayout *DL,
- AssumptionCache *AC,
+ const DataLayout &DL,
const Instruction *CxtI,
+ AssumptionCache *AC,
const DominatorTree *DT) {
assert(V->getType()->isPointerTy() &&
"getOrEnforceKnownAlignment expects a pointer!");
- unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType());
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT);
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index a0f8268..90dfaba 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -57,8 +57,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -209,10 +211,11 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
DominatorTree *DT,
AssumptionCache *AC) {
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -476,7 +479,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, Pass *PP, const DataLayout *DL,
+ ScalarEvolution *SE, Pass *PP,
AssumptionCache *AC) {
bool Changed = false;
ReprocessLoop:
@@ -608,13 +611,15 @@ ReprocessLoop:
}
}
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
// Scan over the PHI nodes in the loop header. Since they now have only two
// incoming values (the loop is canonicalized), we may have simplified the PHI
// down to 'X = phi [X, Y]', which should be replaced with 'Y'.
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -676,7 +681,8 @@ ReprocessLoop:
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
- if (!FoldBranchToCommonDest(BI, DL)) continue;
+ if (!FoldBranchToCommonDest(BI))
+ continue;
// Success. The block is now dead, so remove it from the loop,
// update the dominator tree and delete it.
@@ -714,7 +720,7 @@ ReprocessLoop:
bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
AliasAnalysis *AA, ScalarEvolution *SE,
- const DataLayout *DL, AssumptionCache *AC) {
+ AssumptionCache *AC) {
bool Changed = false;
// Worklist maintains our depth-first queue of loops in this nest to process.
@@ -731,7 +737,7 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
while (!Worklist.empty())
Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
- SE, PP, DL, AC);
+ SE, PP, AC);
return Changed;
}
@@ -749,7 +755,6 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
AssumptionCache *AC;
bool runOnFunction(Function &F) override;
@@ -797,13 +802,11 @@ bool LoopSimplify::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = getAnalysisIfAvailable<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL, AC);
+ Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC);
return Changed;
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index accb731..6b3aa02 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -26,8 +26,8 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -500,6 +500,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// At this point, the code is well formed. We now do a quick sweep over the
// inserted code, doing constant propagation and dead code elimination as we
// go.
+ const DataLayout &DL = Header->getModule()->getDataLayout();
const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
@@ -508,7 +509,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (isInstructionTriviallyDead(Inst))
(*BB)->getInstList().erase(Inst);
- else if (Value *V = SimplifyInstruction(Inst))
+ else if (Value *V = SimplifyInstruction(Inst, DL))
if (LI->replacementPreservesLCSSAForm(Inst, V)) {
Inst->replaceAllUsesWith(V);
(*BB)->getInstList().erase(Inst);
@@ -531,9 +532,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (!OuterL && !CompletelyUnroll)
OuterL = L;
if (OuterL) {
- DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL, AC);
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC);
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 91b688c..381d8fc 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
@@ -339,10 +340,11 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+ const DataLayout &DL = Header->getModule()->getDataLayout();
// Compute the number of extra iterations required, which is:
// extra iterations = run-time trip count % (loop unroll factor + 1)
- SCEVExpander Expander(*SE, "loop-unroll");
+ SCEVExpander Expander(*SE, DL, "loop-unroll");
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index b3bdae4..e0e0e90 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -14,17 +14,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/CFG.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
#include <algorithm>
using namespace llvm;
@@ -175,11 +175,16 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
// Remove additional occurences coming from condensed cases and keep the
// number of incoming values equal to the number of branches to SuccBB.
+ SmallVector<unsigned, 8> Indices;
for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
if (PN->getIncomingBlock(Idx) == OrigBB) {
- PN->removeIncomingValue(Idx);
+ Indices.push_back(Idx);
LocalNumMergedCases--;
}
+ // Remove incoming values in the reverse order to prevent invalidating
+ // *successive* index.
+ for (auto III = Indices.rbegin(), IIE = Indices.rend(); III != IIE; ++III)
+ PN->removeIncomingValue(*III);
}
}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index dabadb7..4b34b19 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <queue>
@@ -667,6 +668,8 @@ void PromoteMem2Reg::run() {
A->eraseFromParent();
}
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
// Remove alloca's dbg.declare instrinsics from the function.
for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
@@ -691,7 +694,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT, AC)) {
+ if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index c057b06..955ce30 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -155,7 +156,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// See if the PHI node can be merged to a single value. This can happen in
// loop cases when we get a PHI of itself and one other value.
- if (Value *V = SimplifyInstruction(InsertedPHI)) {
+ if (Value *V =
+ SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
InsertedPHI->eraseFromParent();
return V;
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 3248a83..c7c0ca6 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -110,8 +110,8 @@ namespace {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
+ const DataLayout &DL;
unsigned BonusInstThreshold;
- const DataLayout *const DL;
AssumptionCache *AC;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -131,9 +131,9 @@ class SimplifyCFGOpt {
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
- const DataLayout *DL, AssumptionCache *AC)
- : TTI(TTI), BonusInstThreshold(BonusInstThreshold), DL(DL), AC(AC) {}
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
+ unsigned BonusInstThreshold, AssumptionCache *AC)
+ : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC) {}
bool run(BasicBlock *BB);
};
}
@@ -223,9 +223,9 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
/// given instruction, which is assumed to be safe to speculate. TCC_Free means
/// cheap, TCC_Basic means less cheap, and TCC_Expensive means prohibitively
/// expensive.
-static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL,
+static unsigned ComputeSpeculationCost(const User *I,
const TargetTransformInfo &TTI) {
- assert(isSafeToSpeculativelyExecute(I, DL) &&
+ assert(isSafeToSpeculativelyExecute(I) &&
"Instruction is not safe to speculatively execute!");
return TTI.getUserCost(I);
}
@@ -249,7 +249,6 @@ static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL,
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSetImpl<Instruction*> *AggressiveInsts,
unsigned &CostRemaining,
- const DataLayout *DL,
const TargetTransformInfo &TTI) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
@@ -283,10 +282,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
- if (!isSafeToSpeculativelyExecute(I, DL))
+ if (!isSafeToSpeculativelyExecute(I))
return false;
- unsigned Cost = ComputeSpeculationCost(I, DL, TTI);
+ unsigned Cost = ComputeSpeculationCost(I, TTI);
if (Cost > CostRemaining)
return false;
@@ -296,7 +295,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL, TTI))
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
return false;
// Okay, it's safe to do this! Remember this instruction.
AggressiveInsts->insert(I);
@@ -305,15 +304,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
/// and PointerNullValue. Return NULL if value is not a constant int.
-static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
+static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
- if (CI || !DL || !isa<Constant>(V) || !V->getType()->isPointerTy())
+ if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
return CI;
// This is some kind of pointer constant. Turn it into a pointer-sized
// ConstantInt if possible.
- IntegerType *PtrTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
+ IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
if (isa<ConstantPointerNull>(V))
@@ -346,16 +345,16 @@ namespace {
/// while for a chain of '&&' it will build the set elements that make the test
/// fail.
struct ConstantComparesGatherer {
-
+ const DataLayout &DL;
Value *CompValue; /// Value found for the switch comparison
Value *Extra; /// Extra clause to be checked before the switch
SmallVector<ConstantInt *, 8> Vals; /// Set of integers to match in switch
unsigned UsedICmps; /// Number of comparisons matched in the and/or chain
/// Construct and compute the result for the comparison instruction Cond
- ConstantComparesGatherer(Instruction *Cond, const DataLayout *DL)
- : CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
- gather(Cond, DL);
+ ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL)
+ : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) {
+ gather(Cond);
}
/// Prevent copy
@@ -380,7 +379,7 @@ private:
/// against is placed in CompValue.
/// If CompValue is already set, the function is expected to fail if a match
/// is found but the value compared to is different.
- bool matchInstruction(Instruction *I, const DataLayout *DL, bool isEQ) {
+ bool matchInstruction(Instruction *I, bool isEQ) {
// If this is an icmp against a constant, handle this as one of the cases.
ICmpInst *ICI;
ConstantInt *C;
@@ -422,8 +421,8 @@ private:
}
// If we have "x ult 3", for example, then we can add 0,1,2 to the set.
- ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(),
- C->getValue());
+ ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
+ ICI->getPredicate(), C->getValue());
// Shift the range if the compare is fed by an add. This is the range
// compare idiom as emitted by instcombine.
@@ -462,7 +461,7 @@ private:
/// the value being compared, and stick the list constants into the Vals
/// vector.
/// One "Extra" case is allowed to differ from the other.
- void gather(Value *V, const DataLayout *DL) {
+ void gather(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
bool isEQ = (I->getOpcode() == Instruction::Or);
@@ -484,7 +483,7 @@ private:
}
// Try to match the current instruction
- if (matchInstruction(I, DL, isEQ))
+ if (matchInstruction(I, isEQ))
// Match succeed, continue the loop
continue;
}
@@ -532,15 +531,16 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
CV = SI->getCondition();
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
if (BI->isConditional() && BI->getCondition()->hasOneUse())
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
CV = ICI->getOperand(0);
+ }
// Unwrap any lossless ptrtoint cast.
- if (DL && CV) {
+ if (CV) {
if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
Value *Ptr = PTII->getPointerOperand();
- if (PTII->getType() == DL->getIntPtrType(Ptr->getType()))
+ if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
CV = Ptr;
}
}
@@ -981,8 +981,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
Builder.SetInsertPoint(PTI);
// Convert pointer to int before we switch.
if (CV->getType()->isPointerTy()) {
- assert(DL && "Cannot switch on pointer without DataLayout");
- CV = Builder.CreatePtrToInt(CV, DL->getIntPtrType(CV->getType()),
+ CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
"magicptr");
}
@@ -1053,7 +1052,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
/// BB2, hoist any common code in the two blocks up into the branch block. The
/// caller of this function guarantees that BI's block dominates BB1 and BB2.
-static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL,
+static bool HoistThenElseCodeToIf(BranchInst *BI,
const TargetTransformInfo &TTI) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
@@ -1145,9 +1144,9 @@ HoistTerminator:
passingValueIsAlwaysUndefined(BB2V, PN))
return Changed;
- if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL))
+ if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
return Changed;
- if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL))
+ if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
return Changed;
}
}
@@ -1467,7 +1466,6 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
///
/// \returns true if the conditional block is removed.
static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
- const DataLayout *DL,
const TargetTransformInfo &TTI) {
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
@@ -1511,14 +1509,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I, DL) &&
- !(HoistCondStores &&
- (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
- EndBB))))
+ if (!isSafeToSpeculativelyExecute(I) &&
+ !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
+ I, BB, ThenBB, EndBB))))
return false;
if (!SpeculatedStoreValue &&
- ComputeSpeculationCost(I, DL, TTI) > PHINodeFoldingThreshold *
- TargetTransformInfo::TCC_Basic)
+ ComputeSpeculationCost(I, TTI) >
+ PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
// Store the store speculation candidate.
@@ -1574,11 +1571,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (!OrigCE && !ThenCE)
continue; // Known safe and cheap.
- if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) ||
- (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL)))
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
return false;
- unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL, TTI) : 0;
- unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL, TTI) : 0;
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
unsigned MaxCost = 2 * PHINodeFoldingThreshold *
TargetTransformInfo::TCC_Basic;
if (OrigCost + ThenCost > MaxCost)
@@ -1688,7 +1685,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// that is defined in the same block as the branch and if any PHI entries are
/// constants, thread edges corresponding to that entry to be branches to their
/// ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -1786,8 +1783,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
/// PHI node, see if we can eliminate it.
-static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL,
- const TargetTransformInfo &TTI) {
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -1830,9 +1827,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL,
}
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
- MaxCostVal0, DL, TTI) ||
+ MaxCostVal0, TTI) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
- MaxCostVal1, DL, TTI))
+ MaxCostVal1, TTI))
return false;
}
@@ -2052,8 +2049,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
/// predecessor branches to us and one of our successors, fold the block into
/// the predecessor and use logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL,
- unsigned BonusInstThreshold) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
BasicBlock *BB = BI->getParent();
Instruction *Cond = nullptr;
@@ -2109,7 +2105,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL,
// Ignore dbg intrinsics.
if (isa<DbgInfoIntrinsic>(I))
continue;
- if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I, DL))
+ if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I))
return false;
// I has only one use and can be executed unconditionally.
Instruction *User = dyn_cast<Instruction>(I->user_back());
@@ -2702,8 +2698,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
/// We prefer to split the edge to 'end' so that there is a true/false entry to
/// the PHI, merging the third icmp into the switch.
static bool TryToSimplifyUncondBranchWithICmpInIt(
- ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI,
- unsigned BonusInstThreshold, const DataLayout *DL, AssumptionCache *AC) {
+ ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL,
+ const TargetTransformInfo &TTI, unsigned BonusInstThreshold,
+ AssumptionCache *AC) {
BasicBlock *BB = ICI->getParent();
// If the block has any PHIs in it or the icmp has multiple uses, it is too
@@ -2736,7 +2733,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->eraseFromParent();
}
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
// Ok, the block is reachable from the default dest. If the constant we're
@@ -2752,7 +2749,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
ICI->replaceAllUsesWith(V);
ICI->eraseFromParent();
// BB is now empty, so it is likely to simplify away.
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
// The use of the icmp has to be in the 'end' block, by the only PHI node in
@@ -2808,8 +2805,8 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
/// Check to see if it is branching on an or/and chain of icmp instructions, and
/// fold it into a switch instruction if so.
-static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
- IRBuilder<> &Builder) {
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
+ const DataLayout &DL) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (!Cond) return false;
@@ -2884,10 +2881,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
Builder.SetInsertPoint(BI);
// Convert pointer to int before we switch.
if (CompVal->getType()->isPointerTy()) {
- assert(DL && "Cannot switch on pointer without DataLayout");
- CompVal = Builder.CreatePtrToInt(CompVal,
- DL->getIntPtrType(CompVal->getType()),
- "magicptr");
+ CompVal = Builder.CreatePtrToInt(
+ CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
}
// Create the new switch instruction now.
@@ -3246,8 +3241,8 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
/// EliminateDeadSwitchCases - Compute masked bits for the condition of a switch
/// and use it to remove dead cases.
-static bool EliminateDeadSwitchCases(SwitchInst *SI, const DataLayout *DL,
- AssumptionCache *AC) {
+static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
+ const DataLayout &DL) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
@@ -3398,9 +3393,8 @@ static Constant *LookupConstant(Value *V,
/// constant or can be replaced by constants from the ConstantPool. Returns the
/// resulting constant on success, 0 otherwise.
static Constant *
-ConstantFold(Instruction *I,
- const SmallDenseMap<Value *, Constant *> &ConstantPool,
- const DataLayout *DL) {
+ConstantFold(Instruction *I, const DataLayout &DL,
+ const SmallDenseMap<Value *, Constant *> &ConstantPool) {
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
if (!A)
@@ -3420,9 +3414,10 @@ ConstantFold(Instruction *I,
return nullptr;
}
- if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
COps[1], DL);
+ }
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), COps, DL);
}
@@ -3432,12 +3427,10 @@ ConstantFold(Instruction *I,
/// destionations CaseDest corresponding to value CaseVal (0 for the default
/// case), of a switch instruction SI.
static bool
-GetCaseResults(SwitchInst *SI,
- ConstantInt *CaseVal,
- BasicBlock *CaseDest,
+GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
BasicBlock **CommonDest,
- SmallVectorImpl<std::pair<PHINode *, Constant *> > &Res,
- const DataLayout *DL) {
+ SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
+ const DataLayout &DL) {
// The block from which we enter the common destination.
BasicBlock *Pred = SI->getParent();
@@ -3456,7 +3449,7 @@ GetCaseResults(SwitchInst *SI,
} else if (isa<DbgInfoIntrinsic>(I)) {
// Skip debug intrinsic.
continue;
- } else if (Constant *C = ConstantFold(I, ConstantPool, DL)) {
+ } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) {
// Instruction is side-effect free and constant.
// If the instruction has uses outside this block or a phi node slot for
@@ -3527,11 +3520,11 @@ static void MapCaseToResult(ConstantInt *CaseVal,
// results for the PHI node of the common destination block for a switch
// instruction. Returns false if multiple PHI nodes have been found or if
// there is not a common destination block for the switch.
-static bool InitializeUniqueCases(
- SwitchInst *SI, const DataLayout *DL, PHINode *&PHI,
- BasicBlock *&CommonDest,
- SwitchCaseResultVectorTy &UniqueResults,
- Constant *&DefaultResult) {
+static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
+ BasicBlock *&CommonDest,
+ SwitchCaseResultVectorTy &UniqueResults,
+ Constant *&DefaultResult,
+ const DataLayout &DL) {
for (auto &I : SI->cases()) {
ConstantInt *CaseVal = I.getCaseValue();
@@ -3638,15 +3631,15 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
/// phi nodes in a common successor block with only two different
/// constant values, replace the switch with select.
static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
- const DataLayout *DL, AssumptionCache *AC) {
+ AssumptionCache *AC, const DataLayout &DL) {
Value *const Cond = SI->getCondition();
PHINode *PHI = nullptr;
BasicBlock *CommonDest = nullptr;
Constant *DefaultResult;
SwitchCaseResultVectorTy UniqueResults;
// Collect all the cases that will deliver the same value from the switch.
- if (!InitializeUniqueCases(SI, DL, PHI, CommonDest, UniqueResults,
- DefaultResult))
+ if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
+ DL))
return false;
// Selects choose between maximum two values.
if (UniqueResults.size() != 2)
@@ -3673,12 +3666,10 @@ namespace {
/// SwitchLookupTable - Create a lookup table to use as a switch replacement
/// with the contents of Values, using DefaultValue to fill any holes in the
/// table.
- SwitchLookupTable(Module &M,
- uint64_t TableSize,
- ConstantInt *Offset,
- const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
- Constant *DefaultValue,
- const DataLayout *DL);
+ SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL);
/// BuildLookup - Build instructions with Builder to retrieve the value at
/// the position given by Index in the lookup table.
@@ -3686,8 +3677,7 @@ namespace {
/// WouldFitInRegister - Return true if a table with TableSize elements of
/// type ElementType would fit in a target-legal register.
- static bool WouldFitInRegister(const DataLayout *DL,
- uint64_t TableSize,
+ static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
const Type *ElementType);
private:
@@ -3729,12 +3719,10 @@ namespace {
};
}
-SwitchLookupTable::SwitchLookupTable(Module &M,
- uint64_t TableSize,
- ConstantInt *Offset,
- const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
- Constant *DefaultValue,
- const DataLayout *DL)
+SwitchLookupTable::SwitchLookupTable(
+ Module &M, uint64_t TableSize, ConstantInt *Offset,
+ const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
+ Constant *DefaultValue, const DataLayout &DL)
: SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) {
assert(Values.size() && "Can't build lookup table without values!");
@@ -3904,11 +3892,9 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
llvm_unreachable("Unknown lookup table kind!");
}
-bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
+bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
uint64_t TableSize,
const Type *ElementType) {
- if (!DL)
- return false;
const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
if (!IT)
return false;
@@ -3918,17 +3904,16 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *DL,
// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
if (TableSize >= UINT_MAX/IT->getBitWidth())
return false;
- return DL->fitsInLegalInteger(TableSize * IT->getBitWidth());
+ return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
}
/// ShouldBuildLookupTable - Determine whether a lookup table should be built
/// for this switch, based on the number of cases, size of the table and the
/// types of the results.
-static bool ShouldBuildLookupTable(SwitchInst *SI,
- uint64_t TableSize,
- const TargetTransformInfo &TTI,
- const DataLayout *DL,
- const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+static bool
+ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
+ const TargetTransformInfo &TTI, const DataLayout &DL,
+ const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
return false; // TableSize overflowed, or mul below might overflow.
@@ -4051,10 +4036,9 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
/// SwitchToLookupTable - If the switch is only used to initialize one or more
/// phi nodes in a common successor block with different constant values,
/// replace the switch with lookup tables.
-static bool SwitchToLookupTable(SwitchInst *SI,
- IRBuilder<> &Builder,
- const TargetTransformInfo &TTI,
- const DataLayout* DL) {
+static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
+ const DataLayout &DL,
+ const TargetTransformInfo &TTI) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// Only build lookup table when we have a target that supports it.
@@ -4125,14 +4109,14 @@ static bool SwitchToLookupTable(SwitchInst *SI,
// or a bitmask that fits in a register.
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
- &CommonDest, DefaultResultsList, DL);
+ &CommonDest, DefaultResultsList, DL);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
// As an extra penalty for the validity test we require more cases.
if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
return false;
- if (!(DL && DL->fitsInLegalInteger(TableSize)))
+ if (!DL.fitsInLegalInteger(TableSize))
return false;
}
@@ -4290,12 +4274,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// see if that predecessor totally determines the outcome of this switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
Value *Cond = SI->getCondition();
if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
if (SimplifySwitchOnSelect(SI, Select))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// If the block only contains the switch, see if we can fold the block
// away into any preds.
@@ -4305,25 +4289,25 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
++BBI;
if (SI == &*BBI)
if (FoldValueComparisonIntoPredecessors(SI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
// Try to transform the switch into an icmp and a branch.
if (TurnSwitchRangeIntoICmp(SI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// Remove unreachable cases.
- if (EliminateDeadSwitchCases(SI, DL, AC))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (EliminateDeadSwitchCases(SI, AC, DL))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
- if (SwitchToSelect(SI, Builder, DL, AC))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (SwitchToSelect(SI, Builder, AC, DL))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
if (ForwardSwitchConditionToPHI(SI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
- if (SwitchToLookupTable(SI, Builder, TTI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (SwitchToLookupTable(SI, Builder, DL, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
return false;
}
@@ -4360,11 +4344,87 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
if (SimplifyIndirectBrOnSelect(IBI, SI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
return Changed;
}
+/// Given an block with only a single landing pad and a unconditional branch
+/// try to find another basic block which this one can be merged with. This
+/// handles cases where we have multiple invokes with unique landing pads, but
+/// a shared handler.
+///
+/// We specifically choose to not worry about merging non-empty blocks
+/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
+/// practice, the optimizer produces empty landing pad blocks quite frequently
+/// when dealing with exception dense code. (see: instcombine, gvn, if-else
+/// sinking in this file)
+///
+/// This is primarily a code size optimization. We need to avoid performing
+/// any transform which might inhibit optimization (such as our ability to
+/// specialize a particular handler via tail commoning). We do this by not
+/// merging any blocks which require us to introduce a phi. Since the same
+/// values are flowing through both blocks, we don't loose any ability to
+/// specialize. If anything, we make such specialization more likely.
+///
+/// TODO - This transformation could remove entries from a phi in the target
+/// block when the inputs in the phi are the same for the two blocks being
+/// merged. In some cases, this could result in removal of the PHI entirely.
+static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
+ BasicBlock *BB) {
+ auto Succ = BB->getUniqueSuccessor();
+ assert(Succ);
+ // If there's a phi in the successor block, we'd likely have to introduce
+ // a phi into the merged landing pad block.
+ if (isa<PHINode>(*Succ->begin()))
+ return false;
+
+ for (BasicBlock *OtherPred : predecessors(Succ)) {
+ if (BB == OtherPred)
+ continue;
+ BasicBlock::iterator I = OtherPred->begin();
+ LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
+ if (!LPad2 || !LPad2->isIdenticalTo(LPad))
+ continue;
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
+ BranchInst *BI2 = dyn_cast<BranchInst>(I);
+ if (!BI2 || !BI2->isIdenticalTo(BI))
+ continue;
+
+ // We've found an identical block. Update our predeccessors to take that
+ // path instead and make ourselves dead.
+ SmallSet<BasicBlock *, 16> Preds;
+ Preds.insert(pred_begin(BB), pred_end(BB));
+ for (BasicBlock *Pred : Preds) {
+ InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
+ assert(II->getNormalDest() != BB &&
+ II->getUnwindDest() == BB && "unexpected successor");
+ II->setUnwindDest(OtherPred);
+ }
+
+ // The debug info in OtherPred doesn't cover the merged control flow that
+ // used to go through BB. We need to delete it or update it.
+ for (auto I = OtherPred->begin(), E = OtherPred->end();
+ I != E;) {
+ Instruction &Inst = *I; I++;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ Inst.eraseFromParent();
+ }
+
+ SmallSet<BasicBlock *, 16> Succs;
+ Succs.insert(succ_begin(BB), succ_end(BB));
+ for (BasicBlock *Succ : Succs) {
+ Succ->removePredecessor(BB);
+ }
+
+ IRBuilder<> Builder(BI);
+ Builder.CreateUnreachable();
+ BI->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
BasicBlock *BB = BI->getParent();
@@ -4384,17 +4444,26 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
for (++I; isa<DbgInfoIntrinsic>(I); ++I)
;
if (I->isTerminator() &&
- TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI,
- BonusInstThreshold, DL, AC))
+ TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI,
+ BonusInstThreshold, AC))
return true;
}
+ // See if we can merge an empty landing pad block with another which is
+ // equivalent.
+ if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
+ for (++I; isa<DbgInfoIntrinsic>(I); ++I) {}
+ if (I->isTerminator() &&
+ TryToMergeLandingPad(LPad, BI, BB))
+ return true;
+ }
+
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (FoldBranchToCommonDest(BI, BonusInstThreshold))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
return false;
}
@@ -4409,7 +4478,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// switch.
if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// This block must be empty, except for the setcond inst, if it exists.
// Ignore dbg intrinsics.
@@ -4419,26 +4488,26 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
++I;
if (&*I == BI) {
if (FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
} else if (&*I == cast<Instruction>(BI->getCondition())){
++I;
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(I))
++I;
if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
}
// Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
- if (SimplifyBranchOnICmpChain(BI, DL, Builder))
+ if (SimplifyBranchOnICmpChain(BI, Builder, DL))
return true;
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI, DL, BonusInstThreshold))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (FoldBranchToCommonDest(BI, BonusInstThreshold))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// We have a conditional branch to two blocks that are only reachable
// from BI. We know that the condbr dominates the two blocks, so see if
@@ -4446,16 +4515,16 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistThenElseCodeToIf(BI, DL, TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (HoistThenElseCodeToIf(BI, TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL, TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
@@ -4463,8 +4532,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL, TTI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
}
// If this is a branch on a phi node in the current block, thread control
@@ -4472,14 +4541,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
if (FoldCondBranchOnPHI(BI, DL))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// Scan predecessor blocks for conditional branches.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
if (PBI != BI && PBI->isConditional())
if (SimplifyCondBranchToCondBranch(PBI, BI))
- return SimplifyCFG(BB, TTI, BonusInstThreshold, DL, AC) | true;
+ return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
return false;
}
@@ -4591,7 +4660,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// eliminate it, do so now.
if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, DL, TTI);
+ Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
Builder.SetInsertPoint(BB->getTerminator());
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -4623,7 +4692,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- unsigned BonusInstThreshold, const DataLayout *DL,
- AssumptionCache *AC) {
- return SimplifyCFGOpt(TTI, BonusInstThreshold, DL, AC).run(BB);
+ unsigned BonusInstThreshold, AssumptionCache *AC) {
+ return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
+ BonusInstThreshold, AC).run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 6a5d885..8bfc5fb 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -270,95 +270,57 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
Value *IVOperand) {
- // Currently we only handle instructions of the form "add <indvar> <value>"
- unsigned Op = BO->getOpcode();
- if (Op != Instruction::Add)
+ // Fastpath: we don't have any work to do if `BO` is `nuw` and `nsw`.
+ if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
return false;
- // If BO is already both nuw and nsw then there is nothing left to do
- if (BO->hasNoUnsignedWrap() && BO->hasNoSignedWrap())
+ const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *,
+ SCEV::NoWrapFlags);
+
+ switch (BO->getOpcode()) {
+ default:
return false;
- IntegerType *IT = cast<IntegerType>(IVOperand->getType());
- Value *OtherOperand = nullptr;
- if (BO->getOperand(0) == IVOperand) {
- OtherOperand = BO->getOperand(1);
- } else {
- assert(BO->getOperand(1) == IVOperand && "only other use!");
- OtherOperand = BO->getOperand(0);
+ case Instruction::Add:
+ GetExprForBO = &ScalarEvolution::getAddExpr;
+ break;
+
+ case Instruction::Sub:
+ GetExprForBO = &ScalarEvolution::getMinusSCEV;
+ break;
+
+ case Instruction::Mul:
+ GetExprForBO = &ScalarEvolution::getMulExpr;
+ break;
}
- bool Changed = false;
- const SCEV *OtherOpSCEV = SE->getSCEV(OtherOperand);
- if (OtherOpSCEV == SE->getCouldNotCompute())
- return false;
+ unsigned BitWidth = cast<IntegerType>(BO->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(BO->getContext(), BitWidth * 2);
+ const SCEV *LHS = SE->getSCEV(BO->getOperand(0));
+ const SCEV *RHS = SE->getSCEV(BO->getOperand(1));
- const SCEV *IVOpSCEV = SE->getSCEV(IVOperand);
- const SCEV *ZeroSCEV = SE->getConstant(IVOpSCEV->getType(), 0);
+ bool Changed = false;
- if (!BO->hasNoSignedWrap()) {
- // Upgrade the add to an "add nsw" if we can prove that it will never
- // sign-overflow or sign-underflow.
-
- const SCEV *SignedMax =
- SE->getConstant(APInt::getSignedMaxValue(IT->getBitWidth()));
- const SCEV *SignedMin =
- SE->getConstant(APInt::getSignedMinValue(IT->getBitWidth()));
-
- // The addition "IVOperand + OtherOp" does not sign-overflow if the result
- // is sign-representable in 2's complement in the given bit-width.
- //
- // If OtherOp is SLT 0, then for an IVOperand in [SignedMin - OtherOp,
- // SignedMax], "IVOperand + OtherOp" is in [SignedMin, SignedMax + OtherOp].
- // Everything in [SignedMin, SignedMax + OtherOp] is representable since
- // SignedMax + OtherOp is at least -1.
- //
- // If OtherOp is SGE 0, then for an IVOperand in [SignedMin, SignedMax -
- // OtherOp], "IVOperand + OtherOp" is in [SignedMin + OtherOp, SignedMax].
- // Everything in [SignedMin + OtherOp, SignedMax] is representable since
- // SignedMin + OtherOp is at most -1.
- //
- // It follows that for all values of IVOperand in [SignedMin - smin(0,
- // OtherOp), SignedMax - smax(0, OtherOp)] the result of the add is
- // representable (i.e. there is no sign-overflow).
-
- const SCEV *UpperDelta = SE->getSMaxExpr(ZeroSCEV, OtherOpSCEV);
- const SCEV *UpperLimit = SE->getMinusSCEV(SignedMax, UpperDelta);
-
- bool NeverSignedOverflows =
- SE->isKnownPredicate(ICmpInst::ICMP_SLE, IVOpSCEV, UpperLimit);
-
- if (NeverSignedOverflows) {
- const SCEV *LowerDelta = SE->getSMinExpr(ZeroSCEV, OtherOpSCEV);
- const SCEV *LowerLimit = SE->getMinusSCEV(SignedMin, LowerDelta);
-
- bool NeverSignedUnderflows =
- SE->isKnownPredicate(ICmpInst::ICMP_SGE, IVOpSCEV, LowerLimit);
- if (NeverSignedUnderflows) {
- BO->setHasNoSignedWrap(true);
- Changed = true;
- }
+ if (!BO->hasNoUnsignedWrap()) {
+ const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy);
+ const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
+ SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy),
+ SCEV::FlagAnyWrap);
+ if (ExtendAfterOp == OpAfterExtend) {
+ BO->setHasNoUnsignedWrap();
+ SE->forgetValue(BO);
+ Changed = true;
}
}
- if (!BO->hasNoUnsignedWrap()) {
- // Upgrade the add computing "IVOperand + OtherOp" to an "add nuw" if we can
- // prove that it will never unsigned-overflow (i.e. the result will always
- // be representable in the given bit-width).
- //
- // "IVOperand + OtherOp" is unsigned-representable in 2's complement iff it
- // does not produce a carry. "IVOperand + OtherOp" produces no carry iff
- // IVOperand ULE (UnsignedMax - OtherOp).
-
- const SCEV *UnsignedMax =
- SE->getConstant(APInt::getMaxValue(IT->getBitWidth()));
- const SCEV *UpperLimit = SE->getMinusSCEV(UnsignedMax, OtherOpSCEV);
-
- bool NeverUnsignedOverflows =
- SE->isKnownPredicate(ICmpInst::ICMP_ULE, IVOpSCEV, UpperLimit);
-
- if (NeverUnsignedOverflows) {
- BO->setHasNoUnsignedWrap(true);
+ if (!BO->hasNoSignedWrap()) {
+ const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy);
+ const SCEV *OpAfterExtend = (SE->*GetExprForBO)(
+ SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy),
+ SCEV::FlagAnyWrap);
+ if (ExtendAfterOp == OpAfterExtend) {
+ BO->setHasNoSignedWrap();
+ SE->forgetValue(BO);
Changed = true;
}
}
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 55a4455..c499c87 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -51,8 +51,7 @@ namespace {
const DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
+ const DataLayout &DL = F.getParent()->getDataLayout();
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
AssumptionCache *AC =
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index fb1d83f..5867d65 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -120,12 +120,12 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
/// string/memory copying library function \p Func.
/// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset.
/// Their fortified (_chk) counterparts are also accepted.
-static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func,
- const DataLayout *DL) {
+static bool checkStringCopyLibFuncSignature(Function *F, LibFunc::Func Func) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
FunctionType *FT = F->getFunctionType();
LLVMContext &Context = F->getContext();
Type *PCharTy = Type::getInt8PtrTy(Context);
- Type *SizeTTy = DL ? DL->getIntPtrType(Context) : nullptr;
+ Type *SizeTTy = DL.getIntPtrType(Context);
unsigned NumParams = FT->getNumParams();
// All string libfuncs return the same type as the first parameter.
@@ -208,10 +208,6 @@ Value *LibCallSimplifier::optimizeStrCat(CallInst *CI, IRBuilder<> &B) {
if (Len == 0)
return Dst;
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
return emitStrLenMemCpy(Src, Dst, Len, B);
}
@@ -230,9 +226,9 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- B.CreateMemCpy(
- CpyDst, Src,
- ConstantInt::get(DL->getIntPtrType(Src->getContext()), Len + 1), 1);
+ B.CreateMemCpy(CpyDst, Src,
+ ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1),
+ 1);
return Dst;
}
@@ -269,10 +265,6 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilder<> &B) {
if (SrcLen == 0 || Len == 0)
return Dst;
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// We don't optimize this case
if (Len < SrcLen)
return nullptr;
@@ -297,24 +289,20 @@ Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilder<> &B) {
// of the input string and turn this into memchr.
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (!CharC) {
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
uint64_t Len = GetStringLength(SrcStr);
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
return nullptr;
- return EmitMemChr(
- SrcStr, CI->getArgOperand(1), // include nul.
- ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), B, DL, TLI);
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len),
+ B, DL, TLI);
}
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
- if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
+ if (CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
return nullptr;
}
@@ -350,8 +338,8 @@ Value *LibCallSimplifier::optimizeStrRChr(CallInst *CI, IRBuilder<> &B) {
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
- if (DL && CharC->isZero())
- return EmitStrChr(SrcStr, '\0', B, DL, TLI);
+ if (CharC->isZero())
+ return EmitStrChr(SrcStr, '\0', B, TLI);
return nullptr;
}
@@ -398,12 +386,8 @@ Value *LibCallSimplifier::optimizeStrCmp(CallInst *CI, IRBuilder<> &B) {
uint64_t Len1 = GetStringLength(Str1P);
uint64_t Len2 = GetStringLength(Str2P);
if (Len1 && Len2) {
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(DL->getIntPtrType(CI->getContext()),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
std::min(Len1, Len2)),
B, DL, TLI);
}
@@ -435,7 +419,7 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
- if (DL && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ if (Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, DL, TLI);
StringRef Str1, Str2;
@@ -462,17 +446,13 @@ Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strcpy))
return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
if (Len == 0)
@@ -481,7 +461,7 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
B.CreateMemCpy(Dst, Src,
- ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len), 1);
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1);
return Dst;
}
@@ -490,11 +470,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
// Verify the "stpcpy" function prototype.
FunctionType *FT = Callee->getFunctionType();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy, DL))
- return nullptr;
-
- // These optimizations require DataLayout.
- if (!DL)
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy))
return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
@@ -509,9 +485,9 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
return nullptr;
Type *PT = FT->getParamType(0);
- Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
+ Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
Value *DstEnd =
- B.CreateGEP(Dst, ConstantInt::get(DL->getIntPtrType(PT), Len - 1));
+ B.CreateGEP(Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
@@ -523,7 +499,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy))
return nullptr;
Value *Dst = CI->getArgOperand(0);
@@ -551,17 +527,13 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
if (Len == 0)
return Dst; // strncpy(x, y, 0) -> x
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// Let strncpy handle the zero padding
if (Len > SrcLen + 1)
return nullptr;
Type *PT = FT->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
- B.CreateMemCpy(Dst, Src, ConstantInt::get(DL->getIntPtrType(PT), Len), 1);
+ B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
return Dst;
}
@@ -629,8 +601,8 @@ Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) {
}
// strpbrk(s, "a") -> strchr(s, 'a')
- if (DL && HasS2 && S2.size() == 1)
- return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
+ if (HasS2 && S2.size() == 1)
+ return EmitStrChr(CI->getArgOperand(0), S2[0], B, TLI);
return nullptr;
}
@@ -706,7 +678,7 @@ Value *LibCallSimplifier::optimizeStrCSpn(CallInst *CI, IRBuilder<> &B) {
}
// strcspn(s, "") -> strlen(s)
- if (DL && HasS2 && S2.empty())
+ if (HasS2 && S2.empty())
return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
return nullptr;
@@ -725,7 +697,7 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
- if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ if (isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
if (!StrLen)
return nullptr;
@@ -767,12 +739,98 @@ Value *LibCallSimplifier::optimizeStrStr(CallInst *CI, IRBuilder<> &B) {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
- Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
+ Value *StrChr = EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TLI);
return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
}
return nullptr;
}
+Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) {
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy(32) ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getReturnType()->isPointerTy())
+ return nullptr;
+
+ Value *SrcStr = CI->getArgOperand(0);
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+
+ // memchr(x, y, 0) -> null
+ if (LenC && LenC->isNullValue())
+ return Constant::getNullValue(CI->getType());
+
+ // From now on we need at least constant length and string.
+ StringRef Str;
+ if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
+ return nullptr;
+
+ // Truncate the string to LenC. If Str is smaller than LenC we will still only
+ // scan the string, as reading past the end of it is undefined and we can just
+ // return null if we don't find the char.
+ Str = Str.substr(0, LenC->getZExtValue());
+
+ // If the char is variable but the input str and length are not we can turn
+ // this memchr call into a simple bit field test. Of course this only works
+ // when the return value is only checked against null.
+ //
+ // It would be really nice to reuse switch lowering here but we can't change
+ // the CFG at this point.
+ //
+ // memchr("\r\n", C, 2) != nullptr -> (C & ((1 << '\r') | (1 << '\n'))) != 0
+ // after bounds check.
+ if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) {
+ unsigned char Max =
+ *std::max_element(reinterpret_cast<const unsigned char *>(Str.begin()),
+ reinterpret_cast<const unsigned char *>(Str.end()));
+
+ // Make sure the bit field we're about to create fits in a register on the
+ // target.
+ // FIXME: On a 64 bit architecture this prevents us from using the
+ // interesting range of alpha ascii chars. We could do better by emitting
+ // two bitfields or shifting the range by 64 if no lower chars are used.
+ if (!DL.fitsInLegalInteger(Max + 1))
+ return nullptr;
+
+ // For the bit field use a power-of-2 type with at least 8 bits to avoid
+ // creating unnecessary illegal types.
+ unsigned char Width = NextPowerOf2(std::max((unsigned char)7, Max));
+
+ // Now build the bit field.
+ APInt Bitfield(Width, 0);
+ for (char C : Str)
+ Bitfield.setBit((unsigned char)C);
+ Value *BitfieldC = B.getInt(Bitfield);
+
+ // First check that the bit field access is within bounds.
+ Value *C = B.CreateZExtOrTrunc(CI->getArgOperand(1), BitfieldC->getType());
+ Value *Bounds = B.CreateICmp(ICmpInst::ICMP_ULT, C, B.getIntN(Width, Width),
+ "memchr.bounds");
+
+ // Create code that checks if the given bit is set in the field.
+ Value *Shl = B.CreateShl(B.getIntN(Width, 1ULL), C);
+ Value *Bits = B.CreateIsNotNull(B.CreateAnd(Shl, BitfieldC), "memchr.bits");
+
+ // Finally merge both checks and cast to pointer type. The inttoptr
+ // implicitly zexts the i1 to intptr type.
+ return B.CreateIntToPtr(B.CreateAnd(Bounds, Bits, "memchr"), CI->getType());
+ }
+
+ // Check if all arguments are constants. If so, we can constant fold.
+ if (!CharC)
+ return nullptr;
+
+ // Compute the offset.
+ size_t I = Str.find(CharC->getSExtValue() & 0xFF);
+ if (I == StringRef::npos) // Didn't find the char. memchr returns null.
+ return Constant::getNullValue(CI->getType());
+
+ // memchr(s+n,c,l) -> gep(s+n+i,c)
+ return B.CreateGEP(SrcStr, B.getInt64(I), "memchr");
+}
+
Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
FunctionType *FT = Callee->getFunctionType();
@@ -827,11 +885,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy))
return nullptr;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
@@ -842,11 +897,8 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove))
return nullptr;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
@@ -857,11 +909,8 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset))
return nullptr;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
@@ -1521,7 +1570,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
- Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, DL, TLI);
+ Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TLI);
if (CI->use_empty() || !Res)
return Res;
return B.CreateIntCast(Res, CI->getType(), true);
@@ -1534,7 +1583,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
Value *GV = B.CreateGlobalString(FormatStr, "str");
- Value *NewCI = EmitPutS(GV, B, DL, TLI);
+ Value *NewCI = EmitPutS(GV, B, TLI);
return (CI->use_empty() || !NewCI)
? NewCI
: ConstantInt::get(CI->getType(), FormatStr.size() + 1);
@@ -1544,7 +1593,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// printf("%c", chr) --> putchar(chr)
if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getArgOperand(1), B, DL, TLI);
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TLI);
if (CI->use_empty() || !Res)
return Res;
@@ -1554,7 +1603,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilder<> &B) {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy()) {
- return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
+ return EmitPutS(CI->getArgOperand(1), B, TLI);
}
return nullptr;
}
@@ -1600,16 +1649,11 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
if (FormatStr[i] == '%')
return nullptr; // we found a format specifier, bail out.
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- B.CreateMemCpy(
- CI->getArgOperand(0), CI->getArgOperand(1),
- ConstantInt::get(DL->getIntPtrType(CI->getContext()),
- FormatStr.size() + 1),
- 1); // Copy the null byte.
+ B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()),
+ FormatStr.size() + 1),
+ 1); // Copy the null byte.
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1634,10 +1678,6 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {
}
if (FormatStr[1] == 's') {
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
@@ -1702,13 +1742,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
return nullptr; // We found a format specifier.
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
return EmitFWrite(
CI->getArgOperand(1),
- ConstantInt::get(DL->getIntPtrType(CI->getContext()), FormatStr.size()),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size()),
CI->getArgOperand(0), B, DL, TLI);
}
@@ -1723,14 +1759,14 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {
// fprintf(F, "%c", chr) --> fputc(chr, F)
if (!CI->getArgOperand(2)->getType()->isIntegerTy())
return nullptr;
- return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
+ return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
return nullptr;
- return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
+ return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TLI);
}
return nullptr;
}
@@ -1790,7 +1826,7 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {
// This optimisation is only valid, if the return value is unused.
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
- Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
+ Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TLI);
return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
}
@@ -1802,10 +1838,6 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- // These optimizations require DataLayout.
- if (!DL)
- return nullptr;
-
// Require two pointers. Also, we can't optimize if return value is used.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
@@ -1820,7 +1852,7 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {
// Known to have no uses (see above).
return EmitFWrite(
CI->getArgOperand(0),
- ConstantInt::get(DL->getIntPtrType(CI->getContext()), Len - 1),
+ ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len - 1),
CI->getArgOperand(1), B, DL, TLI);
}
@@ -1839,7 +1871,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
- Value *Res = EmitPutChar(B.getInt32('\n'), B, DL, TLI);
+ Value *Res = EmitPutChar(B.getInt32('\n'), B, TLI);
if (CI->use_empty() || !Res)
return Res;
return B.CreateIntCast(Res, CI->getType(), true);
@@ -1906,6 +1938,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStrCSpn(CI, Builder);
case LibFunc::strstr:
return optimizeStrStr(CI, Builder);
+ case LibFunc::memchr:
+ return optimizeMemChr(CI, Builder);
case LibFunc::memcmp:
return optimizeMemCmp(CI, Builder);
case LibFunc::memcpy:
@@ -2089,9 +2123,9 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
}
LibCallSimplifier::LibCallSimplifier(
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
function_ref<void(Instruction *, Value *)> Replacer)
- : FortifiedSimplifier(DL, TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false),
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), UnsafeFPShrink(false),
Replacer(Replacer) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
@@ -2187,7 +2221,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memcpy_chk))
return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2201,7 +2235,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &
Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memmove_chk))
return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2215,7 +2249,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<>
Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
- if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, LibFunc::memset_chk))
return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2231,8 +2265,9 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
LibFunc::Func Func) {
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
+ const DataLayout &DL = CI->getModule()->getDataLayout();
- if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, Func))
return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1),
@@ -2250,7 +2285,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
if (isFortifiedCallFoldable(CI, 2, 1, true)) {
- Value *Ret = EmitStrCpy(Dst, Src, B, DL, TLI, Name.substr(2, 6));
+ Value *Ret = EmitStrCpy(Dst, Src, B, TLI, Name.substr(2, 6));
return Ret;
} else if (!OnlyLowerUnknownSize) {
// Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk.
@@ -2258,11 +2293,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
if (Len == 0)
return nullptr;
- // This optimization requires DataLayout.
- if (!DL)
- return nullptr;
-
- Type *SizeTTy = DL->getIntPtrType(CI->getContext());
+ Type *SizeTTy = DL.getIntPtrType(CI->getContext());
Value *LenV = ConstantInt::get(SizeTTy, Len);
Value *Ret = EmitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
// If the function was an __stpcpy_chk, and we were able to fold it into
@@ -2280,12 +2311,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
- if (!checkStringCopyLibFuncSignature(Callee, Func, DL))
+ if (!checkStringCopyLibFuncSignature(Callee, Func))
return nullptr;
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
- Value *Ret =
- EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), B, DL, TLI, Name.substr(2, 7));
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TLI, Name.substr(2, 7));
return Ret;
}
return nullptr;
@@ -2328,8 +2358,6 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
return nullptr;
}
-FortifiedLibCallSimplifier::
-FortifiedLibCallSimplifier(const DataLayout *DL, const TargetLibraryInfo *TLI,
- bool OnlyLowerUnknownSize)
- : DL(DL), TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {
-}
+FortifiedLibCallSimplifier::FortifiedLibCallSimplifier(
+ const TargetLibraryInfo *TLI, bool OnlyLowerUnknownSize)
+ : TLI(TLI), OnlyLowerUnknownSize(OnlyLowerUnknownSize) {}
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index b343cc4..a2a54da 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -60,6 +60,7 @@
#define DEBUG_TYPE "symbol-rewriter"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Pass.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -72,15 +73,15 @@
#include "llvm/Transforms/Utils/SymbolRewriter.h"
using namespace llvm;
+using namespace SymbolRewriter;
static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
cl::desc("Symbol Rewrite Map"),
cl::value_desc("filename"));
-namespace llvm {
-namespace SymbolRewriter {
-void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
- const std::string &Target) {
+static void rewriteComdat(Module &M, GlobalObject *GO,
+ const std::string &Source,
+ const std::string &Target) {
if (Comdat *CD = GO->getComdat()) {
auto &Comdats = M.getComdatSymbolTable();
@@ -92,6 +93,7 @@ void rewriteComdat(Module &M, GlobalObject *GO, const std::string &Source,
}
}
+namespace {
template <RewriteDescriptor::Type DT, typename ValueType,
ValueType *(llvm::Module::*Get)(StringRef) const>
class ExplicitRewriteDescriptor : public RewriteDescriptor {
@@ -226,6 +228,7 @@ typedef PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias,
&llvm::Module::getNamedAlias,
&llvm::Module::aliases>
PatternRewriteNamedAliasDescriptor;
+} // namespace
bool RewriteMapParser::parse(const std::string &MapFile,
RewriteDescriptorList *DL) {
@@ -489,8 +492,6 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
return true;
}
-}
-}
namespace {
class RewriteSymbols : public ModulePass {
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 49c0902..54c7688 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -291,14 +291,18 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
return nullptr;
}
+ // Note: this cast precedes the Flags check so we always get its associated
+ // assertion.
const MDNode *Node = cast<MDNode>(MD);
- assert(Node->isResolved() && "Unexpected unresolved node");
// If this is a module-level metadata and we know that nothing at the
// module level is changing, then use an identity mapping.
if (Flags & RF_NoModuleLevelChanges)
return mapToSelf(VM, MD);
+ // Require resolved nodes whenever metadata might be remapped.
+ assert(Node->isResolved() && "Unexpected unresolved node");
+
if (Node->isDistinct())
return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 525c050..29fb01f 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
@@ -206,8 +207,6 @@ namespace {
AA = &P->getAnalysis<AliasAnalysis>();
DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &P->getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
TTI = IgnoreTargetInfo
? nullptr
: &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -222,7 +221,6 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
ScalarEvolution *SE;
- const DataLayout *DL;
const TargetTransformInfo *TTI;
// FIXME: const correct?
@@ -442,8 +440,6 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
TTI = IgnoreTargetInfo
? nullptr
: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
@@ -642,19 +638,19 @@ namespace {
dyn_cast<SCEVConstant>(OffsetSCEV)) {
ConstantInt *IntOff = ConstOffSCEV->getValue();
int64_t Offset = IntOff->getSExtValue();
-
+ const DataLayout &DL = I->getModule()->getDataLayout();
Type *VTy = IPtr->getType()->getPointerElementType();
- int64_t VTyTSS = (int64_t) DL->getTypeStoreSize(VTy);
+ int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy);
Type *VTy2 = JPtr->getType()->getPointerElementType();
if (VTy != VTy2 && Offset < 0) {
- int64_t VTy2TSS = (int64_t) DL->getTypeStoreSize(VTy2);
+ int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2);
OffsetInElmts = Offset/VTy2TSS;
- return (abs64(Offset) % VTy2TSS) == 0;
+ return (std::abs(Offset) % VTy2TSS) == 0;
}
OffsetInElmts = Offset/VTyTSS;
- return (abs64(Offset) % VTyTSS) == 0;
+ return (std::abs(Offset) % VTyTSS) == 0;
}
return false;
@@ -846,7 +842,7 @@ namespace {
// It is important to cleanup here so that future iterations of this
// function have less work to do.
- (void) SimplifyInstructionsInBlock(&BB, DL, AA->getTargetLibraryInfo());
+ (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo());
return true;
}
@@ -900,10 +896,6 @@ namespace {
return false;
}
- // We can't vectorize memory operations without target data
- if (!DL && IsSimpleLoadStore)
- return false;
-
Type *T1, *T2;
getInstructionTypes(I, T1, T2);
@@ -938,9 +930,8 @@ namespace {
if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
return false;
- if ((!Config.VectorizePointers || !DL) &&
- (T1->getScalarType()->isPointerTy() ||
- T2->getScalarType()->isPointerTy()))
+ if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() ||
+ T2->getScalarType()->isPointerTy()))
return false;
if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
@@ -985,8 +976,8 @@ namespace {
unsigned IAlignment, JAlignment, IAddressSpace, JAddressSpace;
int64_t OffsetInElmts = 0;
if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
- IAddressSpace, JAddressSpace,
- OffsetInElmts) && abs64(OffsetInElmts) == 1) {
+ IAddressSpace, JAddressSpace, OffsetInElmts) &&
+ std::abs(OffsetInElmts) == 1) {
FixedOrder = (int) OffsetInElmts;
unsigned BottomAlignment = IAlignment;
if (OffsetInElmts < 0) BottomAlignment = JAlignment;
@@ -1001,8 +992,8 @@ namespace {
// An aligned load or store is possible only if the instruction
// with the lower offset has an alignment suitable for the
// vector type.
-
- unsigned VecAlignment = DL->getPrefTypeAlignment(VType);
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ unsigned VecAlignment = DL.getPrefTypeAlignment(VType);
if (BottomAlignment < VecAlignment)
return false;
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6142306..b7d0ae4 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -218,6 +218,15 @@ public:
R.getInstr()) {}
};
+/// A helper function for converting Scalar types to vector types.
+/// If the incoming type is void, we return void. If the VF is 1, we return
+/// the scalar type.
+static Type* ToVectorTy(Type *Scalar, unsigned VF) {
+ if (Scalar->isVoidTy() || VF == 1)
+ return Scalar;
+ return VectorType::get(Scalar, VF);
+}
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -235,13 +244,13 @@ public:
class InnerLoopVectorizer {
public:
InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const DataLayout *DL,
- const TargetLibraryInfo *TLI, unsigned VecWidth,
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, unsigned VecWidth,
unsigned UnrollFactor)
- : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
+ : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
- Legal(nullptr) {}
+ Legal(nullptr), AddedSafetyChecks(false) {}
// Perform the actual loop widening (vectorization).
void vectorize(LoopVectorizationLegality *L) {
@@ -255,6 +264,11 @@ public:
updateAnalysis();
}
+ // Return true if any runtime check is added.
+ bool IsSafetyChecksAdded() {
+ return AddedSafetyChecks;
+ }
+
virtual ~InnerLoopVectorizer() {}
protected:
@@ -389,10 +403,10 @@ protected:
DominatorTree *DT;
/// Alias Analysis.
AliasAnalysis *AA;
- /// Data Layout.
- const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
+ /// Target Transform Info.
+ const TargetTransformInfo *TTI;
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
@@ -434,14 +448,17 @@ protected:
EdgeMaskCache MaskCache;
LoopVectorizationLegality *Legal;
+
+ // Record whether runtime check is added.
+ bool AddedSafetyChecks;
};
class InnerLoopUnroller : public InnerLoopVectorizer {
public:
InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const DataLayout *DL,
- const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
- InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ const TargetTransformInfo *TTI, unsigned UnrollFactor)
+ : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
private:
void scalarizeInstruction(Instruction *Instr,
@@ -488,7 +505,7 @@ static std::string getDebugLocString(const Loop *L) {
raw_string_ostream OS(Result);
const DebugLoc LoopDbgLoc = L->getStartLoc();
if (!LoopDbgLoc.isUnknown())
- LoopDbgLoc.print(L->getHeader()->getContext(), OS);
+ LoopDbgLoc.print(OS);
else
// Just print the module name.
OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
@@ -543,14 +560,13 @@ static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *F
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
- LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
- DominatorTree *DT, TargetLibraryInfo *TLI,
- AliasAnalysis *AA, Function *F,
- const TargetTransformInfo *TTI,
+ LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+ TargetLibraryInfo *TLI, AliasAnalysis *AA,
+ Function *F, const TargetTransformInfo *TTI,
LoopAccessAnalysis *LAA)
- : NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr),
- Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
+ : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
+ TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), Induction(nullptr),
+ WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -842,8 +858,6 @@ private:
Loop *TheLoop;
/// Scev analysis.
ScalarEvolution *SE;
- /// DataLayout analysis.
- const DataLayout *DL;
/// Target Library Info.
TargetLibraryInfo *TLI;
/// Parent function
@@ -884,7 +898,7 @@ private:
ValueToValueMap Strides;
SmallPtrSet<Value *, 8> StrideSet;
-
+
/// While vectorizing these instructions we have to generate a
/// call to the appropriate masked intrinsic
SmallPtrSet<const Instruction*, 8> MaskedOp;
@@ -902,10 +916,9 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
- AssumptionCache *AC, const Function *F,
- const LoopVectorizeHints *Hints)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI),
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ const Function *F, const LoopVectorizeHints *Hints)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
TheFunction(F), Hints(Hints) {
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
}
@@ -958,11 +971,6 @@ private:
/// width. Vector width of one means scalar.
unsigned getInstructionCost(Instruction *I, unsigned VF);
- /// A helper function for converting Scalar types to vector types.
- /// If the incoming type is void, we return void. If the VF is 1, we return
- /// the scalar type.
- static Type* ToVectorTy(Type *Scalar, unsigned VF);
-
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
@@ -988,8 +996,6 @@ private:
LoopVectorizationLegality *Legal;
/// Vector target information.
const TargetTransformInfo &TTI;
- /// Target data layout information.
- const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
const Function *TheFunction;
@@ -1254,7 +1260,6 @@ struct LoopVectorize : public FunctionPass {
}
ScalarEvolution *SE;
- const DataLayout *DL;
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
@@ -1270,8 +1275,6 @@ struct LoopVectorize : public FunctionPass {
bool runOnFunction(Function &F) override {
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1292,12 +1295,6 @@ struct LoopVectorize : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
- if (!DL) {
- DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName()
- << ": Missing data layout\n");
- return false;
- }
-
// Build up a worklist of inner-loops to vectorize. This is necessary as
// the act of vectorizing or partially unrolling a loop creates new loops
// and can invalidate iterators across the loops.
@@ -1317,6 +1314,40 @@ struct LoopVectorize : public FunctionPass {
return Changed;
}
+ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
+ SmallVector<Metadata *, 4> MDs;
+ // Reserve first location for self reference to the LoopID metadata node.
+ MDs.push_back(nullptr);
+ bool IsUnrollMetadata = false;
+ MDNode *LoopID = L->getLoopID();
+ if (LoopID) {
+ // First find existing loop unrolling disable metadata.
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (MD) {
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ IsUnrollMetadata =
+ S && S->getString().startswith("llvm.loop.unroll.disable");
+ }
+ MDs.push_back(LoopID->getOperand(i));
+ }
+ }
+
+ if (!IsUnrollMetadata) {
+ // Add runtime unroll disable metadata.
+ LLVMContext &Context = L->getHeader()->getContext();
+ SmallVector<Metadata *, 1> DisableOperands;
+ DisableOperands.push_back(
+ MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
+ MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+ MDs.push_back(DisableNode);
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
+ // Set operand 0 to refer to the loop id itself.
+ NewLoopID->replaceOperandWith(0, NewLoopID);
+ L->setLoopID(NewLoopID);
+ }
+ }
+
bool processLoop(Loop *L) {
assert(L->empty() && "Only process inner loops.");
@@ -1391,7 +1422,7 @@ struct LoopVectorize : public FunctionPass {
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA);
+ LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
@@ -1399,8 +1430,7 @@ struct LoopVectorize : public FunctionPass {
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AC, F,
- &Hints);
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints);
// Check the function attributes to find out if this function should be
// optimized for size.
@@ -1464,14 +1494,20 @@ struct LoopVectorize : public FunctionPass {
// We decided not to vectorize, but we may want to unroll.
- InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
+ InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, UF);
Unroller.vectorize(&LVL);
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+ InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, UF);
LB.vectorize(&LVL);
++LoopsVectorized;
+ // Add metadata to disable runtime unrolling scalar loop when there's no
+ // runtime check about strides and memory. Because at this situation,
+ // scalar loop is rarely used not worthy to be unrolled.
+ if (!LB.IsSafetyChecksAdded())
+ AddRuntimeUnrollDisableMetaData(L);
+
// Report the vectorization decision.
emitOptimizationRemark(
F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
@@ -1561,10 +1597,10 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
/// \brief Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
-static unsigned getGEPInductionOperand(const DataLayout *DL,
- const GetElementPtrInst *Gep) {
+static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
+ const DataLayout &DL = Gep->getModule()->getDataLayout();
unsigned LastOperand = Gep->getNumOperands() - 1;
- unsigned GEPAllocSize = DL->getTypeAllocSize(
+ unsigned GEPAllocSize = DL.getTypeAllocSize(
cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
// Walk backwards and try to peel off zeros.
@@ -1575,7 +1611,7 @@ static unsigned getGEPInductionOperand(const DataLayout *DL,
// If it's a type with the same allocation size as the result of the GEP we
// can peel off the zero index.
- if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
break;
--LastOperand;
}
@@ -1621,7 +1657,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return II.getConsecutiveDirection();
}
- unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
// Check that all of the gep indices are uniform except for our induction
// operand.
@@ -1714,11 +1750,12 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
+ const DataLayout &DL = Instr->getModule()->getDataLayout();
if (!Alignment)
- Alignment = DL->getABITypeAlignment(ScalarDataTy);
+ Alignment = DL.getABITypeAlignment(ScalarDataTy);
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
- unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
- unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+ unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ScalarDataTy);
+ unsigned VectorElementSize = DL.getTypeStoreSize(DataTy) / VF;
if (SI && Legal->blockNeedsPredication(SI->getParent()) &&
!Legal->isMaskRequired(SI))
@@ -1759,7 +1796,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
- unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
@@ -2080,9 +2117,11 @@ void InnerLoopVectorizer::createEmptyLoop() {
ExitCount = SE->getAddExpr(BackedgeTakeCount,
SE->getConstant(BackedgeTakeCount->getType(), 1));
+ const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout();
+
// Expand the trip count and place the new instructions in the preheader.
// Notice that the pre-header does not change, only the loop body.
- SCEVExpander Exp(*SE, "induction");
+ SCEVExpander Exp(*SE, DL, "induction");
// We need to test whether the backedge-taken count is uint##_max. Adding one
// to it will cause overflow and an incorrect loop trip count in the vector
@@ -2218,6 +2257,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
std::tie(FirstCheckInst, StrideCheck) =
addStrideCheck(LastBypassBlock->getTerminator());
if (StrideCheck) {
+ AddedSafetyChecks = true;
// Create a new block containing the stride check.
BasicBlock *CheckBlock =
LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
@@ -2242,6 +2282,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
std::tie(FirstCheckInst, MemRuntimeCheck) =
Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
if (MemRuntimeCheck) {
+ AddedSafetyChecks = true;
// Create a new block containing the memory check.
BasicBlock *CheckBlock =
LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
@@ -2480,10 +2521,9 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
}
}
-Value *createMinMaxOp(IRBuilder<> &Builder,
- LoopVectorizationLegality::MinMaxReductionKind RK,
- Value *Left,
- Value *Right) {
+static Value *createMinMaxOp(IRBuilder<> &Builder,
+ LoopVectorizationLegality::MinMaxReductionKind RK,
+ Value *Left, Value *Right) {
CmpInst::Predicate P = CmpInst::ICMP_NE;
switch (RK) {
default:
@@ -2594,6 +2634,95 @@ static Value *addFastMathFlag(Value *V) {
return V;
}
+/// Estimate the overhead of scalarizing a value. Insert and Extract are set if
+/// the result needs to be inserted and/or extracted from vectors.
+static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
+ const TargetTransformInfo &TTI) {
+ if (Ty->isVoidTy())
+ return 0;
+
+ assert(Ty->isVectorTy() && "Can only scalarize vectors");
+ unsigned Cost = 0;
+
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ if (Insert)
+ Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ if (Extract)
+ Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+
+ return Cost;
+}
+
+// Estimate cost of a call instruction CI if it were vectorized with factor VF.
+// Return the cost of the instruction, including scalarization overhead if it's
+// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
+// i.e. either vector version isn't available, or is too expensive.
+static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
+ const TargetTransformInfo &TTI,
+ const TargetLibraryInfo *TLI,
+ bool &NeedToScalarize) {
+ Function *F = CI->getCalledFunction();
+ StringRef FnName = CI->getCalledFunction()->getName();
+ Type *ScalarRetTy = CI->getType();
+ SmallVector<Type *, 4> Tys, ScalarTys;
+ for (auto &ArgOp : CI->arg_operands())
+ ScalarTys.push_back(ArgOp->getType());
+
+ // Estimate cost of scalarized vector call. The source operands are assumed
+ // to be vectors, so we need to extract individual elements from there,
+ // execute VF scalar calls, and then gather the result into the vector return
+ // value.
+ unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys);
+ if (VF == 1)
+ return ScalarCallCost;
+
+ // Compute corresponding vector type for return value and arguments.
+ Type *RetTy = ToVectorTy(ScalarRetTy, VF);
+ for (unsigned i = 0, ie = ScalarTys.size(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(ScalarTys[i], VF));
+
+ // Compute costs of unpacking argument values for the scalar calls and
+ // packing the return values to a vector.
+ unsigned ScalarizationCost =
+ getScalarizationOverhead(RetTy, true, false, TTI);
+ for (unsigned i = 0, ie = Tys.size(); i != ie; ++i)
+ ScalarizationCost += getScalarizationOverhead(Tys[i], false, true, TTI);
+
+ unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
+
+ // If we can't emit a vector call for this function, then the currently found
+ // cost is the cost we need to return.
+ NeedToScalarize = true;
+ if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin())
+ return Cost;
+
+ // If the corresponding vector cost is cheaper, return its cost.
+ unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys);
+ if (VectorCallCost < Cost) {
+ NeedToScalarize = false;
+ return VectorCallCost;
+ }
+ return Cost;
+}
+
+// Estimate cost of an intrinsic call instruction CI if it were vectorized with
+// factor VF. Return the cost of the instruction, including scalarization
+// overhead if it's needed.
+static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
+ const TargetTransformInfo &TTI,
+ const TargetLibraryInfo *TLI) {
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ assert(ID && "Expected intrinsic call!");
+
+ Type *RetTy = ToVectorTy(CI->getType(), VF);
+ SmallVector<Type *, 4> Tys;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+
+ return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+}
+
void InnerLoopVectorizer::vectorizeLoop() {
//===------------------------------------------------===//
//
@@ -3181,37 +3310,71 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Module *M = BB->getParent()->getParent();
CallInst *CI = cast<CallInst>(it);
+
+ StringRef FnName = CI->getCalledFunction()->getName();
+ Function *F = CI->getCalledFunction();
+ Type *RetTy = ToVectorTy(CI->getType(), VF);
+ SmallVector<Type *, 4> Tys;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
+ Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
+
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
- assert(ID && "Not an intrinsic call!");
- switch (ID) {
- case Intrinsic::assume:
- case Intrinsic::lifetime_end:
- case Intrinsic::lifetime_start:
+ if (ID &&
+ (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
+ ID == Intrinsic::lifetime_start)) {
scalarizeInstruction(it);
break;
- default:
- bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value *, 4> Args;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- if (HasScalarOpd && i == 1) {
- Args.push_back(CI->getArgOperand(i));
- continue;
- }
- VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
- Args.push_back(Arg[Part]);
- }
- Type *Tys[] = {CI->getType()};
- if (VF > 1)
- Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+ }
+ // The flag shows whether we use Intrinsic or a usual Call for vectorized
+ // version of the instruction.
+ // Is it beneficial to perform intrinsic call compared to lib call?
+ bool NeedToScalarize;
+ unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ bool UseVectorIntrinsic =
+ ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ if (!UseVectorIntrinsic && NeedToScalarize) {
+ scalarizeInstruction(it);
+ break;
+ }
- Function *F = Intrinsic::getDeclaration(M, ID, Tys);
- Entry[Part] = Builder.CreateCall(F, Args);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Value *, 4> Args;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ Value *Arg = CI->getArgOperand(i);
+ // Some intrinsics have a scalar argument - don't replace it with a
+ // vector.
+ if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
+ VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
+ Arg = VectorArg[Part];
+ }
+ Args.push_back(Arg);
}
- propagateMetadata(Entry, it);
- break;
+ Function *VectorF;
+ if (UseVectorIntrinsic) {
+ // Use vector version of the intrinsic.
+ Type *TysForDecl[] = {CI->getType()};
+ if (VF > 1)
+ TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+ VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
+ } else {
+ // Use vector version of the library call.
+ StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
+ assert(!VFnName.empty() && "Vector function name is empty.");
+ VectorF = M->getFunction(VFnName);
+ if (!VectorF) {
+ // Generate a declaration
+ FunctionType *FTy = FunctionType::get(RetTy, Tys, false);
+ VectorF =
+ Function::Create(FTy, Function::ExternalLinkage, VFnName, M);
+ VectorF->copyAttributesFrom(F);
+ }
+ }
+ assert(VectorF && "Can't create vector function.");
+ Entry[Part] = Builder.CreateCall(VectorF, Args);
}
+
+ propagateMetadata(Entry, it);
break;
}
@@ -3463,6 +3626,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Look for the attribute signaling the absence of NaNs.
Function &F = *Header->getParent();
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (F.hasFnAttribute("no-nans-fp-math"))
HasFunNoNaNAttr =
F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
@@ -3518,9 +3682,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (IK_NoInduction != IK) {
// Get the widest type.
if (!WidestIndTy)
- WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+ WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
else
- WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+ WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction && StepValue->isOne()) {
@@ -3591,13 +3755,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}// end of PHI handling
- // We still don't handle functions. However, we can ignore dbg intrinsic
- // calls and we do handle certain intrinsic and libm functions.
+ // We handle calls that:
+ // * Are debug info intrinsics.
+ // * Have a mapping to an IR intrinsic.
+ // * Have a vector version available.
CallInst *CI = dyn_cast<CallInst>(it);
- if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
+ if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
+ !(CI->getCalledFunction() && TLI &&
+ TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
emitAnalysis(VectorizationReport(it) <<
"call instruction cannot be vectorized");
- DEBUG(dbgs() << "LV: Found a call site.\n");
+ DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
return false;
}
@@ -3665,13 +3833,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
///\brief Remove GEPs whose indices but the last one are loop invariant and
/// return the induction operand of the gep pointer.
-static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE,
- const DataLayout *DL, Loop *Lp) {
+static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP)
return Ptr;
- unsigned InductionOperand = getGEPInductionOperand(DL, GEP);
+ unsigned InductionOperand = getGEPInductionOperand(GEP);
// Check that all of the gep indices are uniform except for our induction
// operand.
@@ -3700,8 +3867,7 @@ static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
///\brief Get the stride of a pointer access in a loop.
/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a
/// pointer to the Value, or null otherwise.
-static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
- const DataLayout *DL, Loop *Lp) {
+static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy || PtrTy->isAggregateType())
return nullptr;
@@ -3714,7 +3880,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
// The size of the pointer access.
int64_t PtrAccessSize = 1;
- Ptr = stripGetElementPtr(Ptr, SE, DL, Lp);
+ Ptr = stripGetElementPtr(Ptr, SE, Lp);
const SCEV *V = SE->getSCEV(Ptr);
if (Ptr != OrigPtr)
@@ -3733,7 +3899,8 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
// Strip off the size of access multiplication if we are still analyzing the
// pointer.
if (OrigPtr == Ptr) {
- DL->getTypeAllocSize(PtrTy->getElementType());
+ const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+ DL.getTypeAllocSize(PtrTy->getElementType());
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
if (M->getOperand(0)->getSCEVType() != scConstant)
return nullptr;
@@ -3785,7 +3952,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
else
return;
- Value *Stride = getStrideFromPointer(Ptr, SE, DL, TheLoop);
+ Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
if (!Stride)
return;
@@ -3837,7 +4004,19 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
auto &OptionalReport = LAI->getReport();
if (OptionalReport)
emitAnalysis(VectorizationReport(*OptionalReport));
- return LAI->canVectorizeMemory();
+ if (!LAI->canVectorizeMemory())
+ return false;
+
+ if (LAI->getNumRuntimePointerChecks() >
+ VectorizerParams::RuntimeMemoryCheckThreshold) {
+ emitAnalysis(VectorizationReport()
+ << LAI->getNumRuntimePointerChecks() << " exceeds limit of "
+ << VectorizerParams::RuntimeMemoryCheckThreshold
+ << " dependent memory operations checked at runtime");
+ DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
+ return false;
+ }
+ return true;
}
static bool hasMultipleUsesOf(Instruction *I,
@@ -4163,7 +4342,8 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
if (!PointerElementType->isSized())
return IK_NoInduction;
- int64_t Size = static_cast<int64_t>(DL->getTypeAllocSize(PointerElementType));
+ const DataLayout &DL = Phi->getModule()->getDataLayout();
+ int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return IK_NoInduction;
@@ -4375,6 +4555,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
unsigned LoopVectorizationCostModel::getWidestType() {
unsigned MaxWidth = 8;
+ const DataLayout &DL = TheFunction->getParent()->getDataLayout();
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -4409,7 +4590,7 @@ unsigned LoopVectorizationCostModel::getWidestType() {
continue;
MaxWidth = std::max(MaxWidth,
- (unsigned)DL->getTypeSizeInBits(T->getScalarType()));
+ (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
}
}
@@ -4561,6 +4742,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
return SmallUF;
}
+ // Unroll if this is a large loop (small loops are already dealt with by this
+ // point) that could benefit from interleaved unrolling.
+ bool HasReductions = (Legal->getReductionVars()->size() > 0);
+ if (TTI.enableAggressiveInterleaving(HasReductions)) {
+ DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n");
+ return UF;
+ }
+
DEBUG(dbgs() << "LV: Not Unrolling.\n");
return 1;
}
@@ -4898,8 +5087,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// Scalarized loads/stores.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
- unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
- unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ValTy);
+ unsigned VectorElementSize = DL.getTypeStoreSize(VectorTy) / VF;
if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
bool IsComplexComputation =
isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
@@ -4960,14 +5150,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
}
case Instruction::Call: {
+ bool NeedToScalarize;
CallInst *CI = cast<CallInst>(I);
- Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
- assert(ID && "Not an intrinsic call!");
- Type *RetTy = ToVectorTy(CI->getType(), VF);
- SmallVector<Type*, 4> Tys;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
- Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF));
- return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
+ unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
+ if (getIntrinsicIDForCall(CI, TLI))
+ return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
+ return CallCost;
}
default: {
// We are scalarizing the instruction. Return the cost of the scalar
@@ -4994,12 +5182,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
}// end of switch.
}
-Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
- if (Scalar->isVoidTy() || VF == 1)
- return Scalar;
- return VectorType::get(Scalar, VF);
-}
-
char LoopVectorize::ID = 0;
static const char lv_name[] = "Loop Vectorization";
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index baf9741..8fc4cc1 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -17,9 +17,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -342,11 +342,11 @@ public:
typedef SmallPtrSet<Value *, 16> ValueSet;
typedef SmallVector<StoreInst *, 8> StoreList;
- BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
- TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa,
- LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC)
+ BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
+ TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
+ DominatorTree *Dt, AssumptionCache *AC)
: NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
- SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
+ SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
Builder(Se->getContext()) {
CodeMetrics::collectEphemeralValues(F, AC, EphValues);
}
@@ -383,7 +383,7 @@ public:
}
/// \returns true if the memory operations A and B are consecutive.
- bool isConsecutiveAccess(Value *A, Value *B);
+ bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
@@ -877,7 +877,6 @@ private:
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
- const DataLayout *DL;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
@@ -1130,8 +1129,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
- if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
- if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0])) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
+ if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
++NumLoadsWantToChangeOrder;
}
BS.cancelScheduling(VL);
@@ -1300,9 +1300,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
return;
}
case Instruction::Store: {
+ const DataLayout &DL = F->getParent()->getDataLayout();
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
@@ -1789,7 +1790,7 @@ unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
return -1;
}
-bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) {
Value *PtrA = getPointerOperand(A);
Value *PtrB = getPointerOperand(B);
unsigned ASA = getAddressSpaceOperand(A);
@@ -1803,13 +1804,13 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
return false;
- unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
+ unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
+ APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
- PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
- PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
APInt OffsetDelta = OffsetB - OffsetA;
@@ -1842,6 +1843,7 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
// Push left and right operands of binary operation into Left and Right
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
@@ -1856,10 +1858,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
Instruction *VL1 = cast<Instruction>(VL[j]);
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (isConsecutiveAccess(L, L1) && VL1->isCommutative()) {
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
std::swap(Left[j], Right[j]);
continue;
- } else if (isConsecutiveAccess(L, L1) && VL2->isCommutative()) {
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -1870,10 +1872,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
Instruction *VL1 = cast<Instruction>(VL[j]);
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (isConsecutiveAccess(L, L1) && VL1->isCommutative()) {
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
std::swap(Left[j], Right[j]);
continue;
- } else if (isConsecutiveAccess(L, L1) && VL2->isCommutative()) {
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -1983,6 +1985,8 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
Right = OrigRight;
}
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
// Finally check if we can get longer vectorizable chain by reordering
// without breaking the good operand order detected above.
// E.g. If we have something like-
@@ -2001,7 +2005,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
for (unsigned j = 0; j < VL.size() - 1; ++j) {
if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
- if (isConsecutiveAccess(L, L1)) {
+ if (isConsecutiveAccess(L, L1, DL)) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -2009,7 +2013,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
}
if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
- if (isConsecutiveAccess(L, L1)) {
+ if (isConsecutiveAccess(L, L1, DL)) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -2105,6 +2109,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return Gather(E->Scalars, VecTy);
}
+ const DataLayout &DL = F->getParent()->getDataLayout();
unsigned Opcode = getSameOpcode(E->Scalars);
switch (Opcode) {
@@ -2301,8 +2306,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
- if (!Alignment)
- Alignment = DL->getABITypeAlignment(ScalarLoadTy);
+ if (!Alignment) {
+ Alignment = DL.getABITypeAlignment(ScalarLoadTy);
+ }
LI->setAlignment(Alignment);
E->VectorizedValue = LI;
++NumVectorInstructions;
@@ -2331,8 +2337,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(
ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
- if (!Alignment)
- Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
+ if (!Alignment) {
+ Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType());
+ }
S->setAlignment(Alignment);
E->VectorizedValue = S;
++NumVectorInstructions;
@@ -2358,7 +2365,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
OpVecs.push_back(OpVec);
}
- Value *V = Builder.CreateGEP(Op0, OpVecs);
+ Value *V = Builder.CreateGEP(
+ cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -3051,7 +3059,6 @@ struct SLPVectorizer : public FunctionPass {
}
ScalarEvolution *SE;
- const DataLayout *DL;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
@@ -3064,8 +3071,6 @@ struct SLPVectorizer : public FunctionPass {
return false;
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
@@ -3082,11 +3087,6 @@ struct SLPVectorizer : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
- // Must have DataLayout. We can't require it because some tests run w/o
- // triple.
- if (!DL)
- return false;
-
// Don't vectorize when the attribute NoImplicitFloat is used.
if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;
@@ -3095,7 +3095,7 @@ struct SLPVectorizer : public FunctionPass {
// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.
- BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AC);
+ BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC);
// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
// delete instructions.
@@ -3178,15 +3178,11 @@ private:
/// the WeakVH array.
/// Vectorization of part of the VL array may cause later values in the VL array
/// to become invalid. We track when this has happened in the WeakVH array.
-static bool hasValueBeenRAUWed(ArrayRef<Value *> &VL,
- SmallVectorImpl<WeakVH> &VH,
- unsigned SliceBegin,
- unsigned SliceSize) {
- for (unsigned i = SliceBegin; i < SliceBegin + SliceSize; ++i)
- if (VH[i] != VL[i])
- return true;
-
- return false;
+static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
+ unsigned SliceBegin, unsigned SliceSize) {
+ VL = VL.slice(SliceBegin, SliceSize);
+ VH = VH.slice(SliceBegin, SliceSize);
+ return !std::equal(VL.begin(), VL.end(), VH.begin());
}
bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
@@ -3195,7 +3191,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
<< "\n");
Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
- unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+ auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout();
+ unsigned Sz = DL.getTypeSizeInBits(StoreTy);
unsigned VF = MinVecRegSize / Sz;
if (!isPowerOf2_32(Sz) || VF < 2)
@@ -3238,8 +3235,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
int costThreshold, BoUpSLP &R) {
- SetVector<Value *> Heads, Tails;
- SmallDenseMap<Value *, Value *> ConsecutiveChain;
+ SetVector<StoreInst *> Heads, Tails;
+ SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
// We may run into multiple chains that merge into a single chain. We mark the
// stores that we vectorized so that we don't visit the same store twice.
@@ -3252,8 +3249,8 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
for (unsigned j = 0; j < e; ++j) {
if (i == j)
continue;
-
- if (R.isConsecutiveAccess(Stores[i], Stores[j])) {
+ const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
+ if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) {
Tails.insert(Stores[j]);
Heads.insert(Stores[i]);
ConsecutiveChain[Stores[i]] = Stores[j];
@@ -3262,7 +3259,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
}
// For stores that start but don't end a link in the chain:
- for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();
+ for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
it != e; ++it) {
if (Tails.count(*it))
continue;
@@ -3270,7 +3267,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
// We found a store instr that starts a chain. Now follow the chain and try
// to vectorize it.
BoUpSLP::ValueList Operands;
- Value *I = *it;
+ StoreInst *I = *it;
// Collect the chain into a list.
while (Tails.count(I) || Heads.count(I)) {
if (VectorizedStores.count(I))
@@ -3295,6 +3292,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
unsigned count = 0;
StoreRefs.clear();
+ const DataLayout &DL = BB->getModule()->getDataLayout();
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
StoreInst *SI = dyn_cast<StoreInst>(it);
if (!SI)
@@ -3340,9 +3338,10 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
return false;
unsigned Opcode0 = I0->getOpcode();
+ const DataLayout &DL = I0->getModule()->getDataLayout();
Type *Ty0 = I0->getType();
- unsigned Sz = DL->getTypeSizeInBits(Ty0);
+ unsigned Sz = DL.getTypeSizeInBits(Ty0);
unsigned VF = MinVecRegSize / Sz;
for (int i = 0, e = VL.size(); i < e; ++i) {
@@ -3544,8 +3543,7 @@ public:
ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
/// \brief Try to find a reduction tree.
- bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
- const DataLayout *DL) {
+ bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
assert((!Phi ||
std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
"Thi phi needs to use the binary operator");
@@ -3570,9 +3568,10 @@ public:
if (!isValidElementType(Ty))
return false;
+ const DataLayout &DL = B->getModule()->getDataLayout();
ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;
- ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+ ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
ReductionRoot = B;
ReductionPHI = Phi;
@@ -3882,8 +3881,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Try to match and vectorize a horizontal reduction.
HorizontalReduction HorRdx;
- if (ShouldVectorizeHor &&
- HorRdx.matchAssociativeReduction(P, BI, DL) &&
+ if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) &&
HorRdx.tryToReduce(R, TTI)) {
Changed = true;
it = BB->begin();
@@ -3913,7 +3911,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
HorizontalReduction HorRdx;
- if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) &&
+ if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) &&
HorRdx.tryToReduce(R, TTI)) ||
tryToVectorize(BinOp, R))) {
Changed = true;